From e28b754d23440d2c433f880d296926363441bd78 Mon Sep 17 00:00:00 2001 From: GitLab Bot Date: Wed, 2 Sep 2020 03:10:30 +0000 Subject: [PATCH] Add latest changes from gitlab-org/gitlab@master --- ...d-gzip-compression-to-discussion-diffs.yml | 5 ++ doc/administration/object_storage.md | 57 +++++++++++++++++- doc/administration/raketasks/doctor.md | 2 - doc/ci/migration/circleci.md | 8 +-- doc/ci/migration/jenkins.md | 18 +++--- doc/development/database_debugging.md | 2 +- doc/development/documentation/index.md | 10 ++-- doc/development/documentation/styleguide.md | 9 ++- doc/user/packages/package_registry/index.md | 2 +- lib/gitlab/diff/highlight_cache.rb | 28 +-------- .../discussions_diff/highlight_cache.rb | 6 +- lib/gitlab/utils/gzip.rb | 30 ++++++++++ .../discussions_diff/highlight_cache_spec.rb | 4 +- spec/lib/gitlab/utils/gzip_spec.rb | 58 +++++++++++++++++++ 14 files changed, 184 insertions(+), 55 deletions(-) create mode 100644 changelogs/unreleased/225600-add-gzip-compression-to-discussion-diffs.yml create mode 100644 lib/gitlab/utils/gzip.rb create mode 100644 spec/lib/gitlab/utils/gzip_spec.rb diff --git a/changelogs/unreleased/225600-add-gzip-compression-to-discussion-diffs.yml b/changelogs/unreleased/225600-add-gzip-compression-to-discussion-diffs.yml new file mode 100644 index 00000000000..1bffc63e47f --- /dev/null +++ b/changelogs/unreleased/225600-add-gzip-compression-to-discussion-diffs.yml @@ -0,0 +1,5 @@ +--- +title: Apply GZip compression to discussion diffs +merge_request: 40778 +author: +type: performance diff --git a/doc/administration/object_storage.md b/doc/administration/object_storage.md index 8668200ce44..e34182676d6 100644 --- a/doc/administration/object_storage.md +++ b/doc/administration/object_storage.md @@ -18,6 +18,7 @@ GitLab has been tested on a number of object storage providers: - [Digital Ocean Spaces](https://www.digitalocean.com/products/spaces/) - [Oracle Cloud Infrastructure](https://docs.cloud.oracle.com/en-us/iaas/Content/Object/Tasks/s3compatibleapi.htm) - [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) +- [Azure Blob storage](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) - On-premises hardware and appliances from various storage vendors. - MinIO. We have [a guide to deploying this](https://docs.gitlab.com/charts/advanced/external-object-storage/minio.html) within our Helm Chart documentation. @@ -158,7 +159,6 @@ See the section on [ETag mismatch errors](#etag-mismatch) for more details. ```toml [object_storage] - enabled = true provider = "AWS" [object_storage.s3] @@ -272,6 +272,61 @@ gitlab_rails['object_store']['connection'] = { } ``` +#### Azure Blob storage + +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/25877) in GitLab 13.4. + +Although Azure uses the word `container` to denote a collection of +blobs, GitLab standardizes on the term `bucket`. Be sure to configure +Azure container names in the `bucket` settings. + +The following are the valid connection parameters for Azure. Read the +[Azure Blob storage documentation](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) +to learn more. + +| Setting | Description | Example | +|---------|-------------|---------| +| `provider` | Provider name | `AzureRM` | +| `azure_storage_account_name` | Name of the Azure Blob Storage account used to access the storage | `azuretest` | +| `azure_storage_access_key` | Storage account access key used to access the container. This is typically a secret, 512-bit encryption key encoded in base64. | `"czV2OHkvQj9FKEgrTWJRZVRoV21ZcTN0Nnc5eiRDJkYpSkBOY1JmVWpYbjJy\nNHU3eCFBJUQqRy1LYVBkU2dWaw==\n"` | +| `azure_storage_domain` | Domain name used to contact the Azure Blob Storage API (optional). Defaults to `blob.core.windows.net`. Set this if you are using Azure China, Azure Germany, Azure US Government, or some other custom Azure domain. | `blob.core.windows.net` | + +##### Azure example (consolidated form) + +For Omnibus installations, this is an example of the `connection` setting: + +```ruby +gitlab_rails['object_store']['connection'] = { + 'provider' => 'AzureRM', + 'azure_storage_account_name' => '', + 'azure_storage_access_key' => '', + 'azure_storage_domain' => '', +} +``` + +###### Azure Workhorse settings (source installs only) + +NOTE: **Note:** +For source installations, Workhorse needs to be configured with the +Azure credentials as well. This is not needed in Omnibus installs because +the Workhorse settings are populated from the settings above. + +1. Edit `/home/git/gitlab-workhorse/config.toml` and add or amend the following lines: + + ```toml + [object_storage] + provider = "AzureRM" + + [object_storage.azurerm] + azure_storage_account_name = "" + azure_storage_access_key = "" + ``` + +If you are using a custom Azure storage domain, note that +`azure_storage_domain` does **not** have to be set in the Workhorse +configuration. This information is exchanged in an API call between +GitLab Rails and Workhorse. + #### OpenStack-compatible connection settings NOTE: **Note:** diff --git a/doc/administration/raketasks/doctor.md b/doc/administration/raketasks/doctor.md index 2c1b6928663..62d0af70706 100644 --- a/doc/administration/raketasks/doctor.md +++ b/doc/administration/raketasks/doctor.md @@ -33,7 +33,6 @@ bundle exec rake gitlab:doctor:secrets RAILS_ENV=production **Example output** - ```plaintext I, [2020-06-11T17:17:54.951815 #27148] INFO -- : Checking encrypted values in the database I, [2020-06-11T17:18:12.677708 #27148] INFO -- : - ApplicationSetting failures: 0 @@ -45,7 +44,6 @@ I, [2020-06-11T17:18:15.575533 #27148] INFO -- : - ScimOauthAccessToken failure I, [2020-06-11T17:18:15.575678 #27148] INFO -- : Total: 1 row(s) affected I, [2020-06-11T17:18:15.575711 #27148] INFO -- : Done! ``` - ### Verbose mode diff --git a/doc/ci/migration/circleci.md b/doc/ci/migration/circleci.md index 78705815c24..c08fdf297b4 100644 --- a/doc/ci/migration/circleci.md +++ b/doc/ci/migration/circleci.md @@ -276,17 +276,17 @@ There are two GitLab issues open addressing CircleCI Orbs and how GitLab can ach ## Build environments -CircleCI offers `executors` as the underlying technology to run a specific job. In GitLab, this is done by [Runners](https://docs.gitlab.com/runner/). +CircleCI offers `executors` as the underlying technology to run a specific job. In GitLab, this is done by [runners](https://docs.gitlab.com/runner/). The following environments are supported: -Self-Managed Runners: +Self-managed runners: - Linux - Windows - macOS -GitLab.com Shared Runners: +GitLab.com shared runners: - Linux - Windows @@ -294,7 +294,7 @@ GitLab.com Shared Runners: ### Machine and specific build environments -[Tags](../yaml/README.md#tags) can be used to run jobs on different platforms, by telling GitLab which Runners should run the jobs. +[Tags](../yaml/README.md#tags) can be used to run jobs on different platforms, by telling GitLab which runners should run the jobs. CircleCI example of a job running on a specific environment: diff --git a/doc/ci/migration/jenkins.md b/doc/ci/migration/jenkins.md index 1d029dcdd14..6857e736580 100644 --- a/doc/ci/migration/jenkins.md +++ b/doc/ci/migration/jenkins.md @@ -17,7 +17,7 @@ that were able to quickly complete this migration: 1. Start by reading the GitLab CI/CD [Quick Start Guide](../quick_start/README.md) and [important product differences](#important-product-differences). 1. Learn the importance of [managing the organizational transition](#managing-the-organizational-transition). -1. [Add Runners](../runners/README.md) to your GitLab instance. +1. [Add runners](../runners/README.md) to your GitLab instance. 1. Educate and enable your developers to independently perform the following steps in their projects: 1. Review the [Quick Start Guide](../quick_start/README.md) and [Pipeline Configuration Reference](../yaml/README.md). 1. Use the [Jenkins Wrapper](#jenkinsfile-wrapper) to temporarily maintain fragile Jenkins jobs. @@ -117,26 +117,26 @@ There are some high level differences between the products worth mentioning: or other manual jobs that function like utilities. Jenkins installations tend to have a few of these. -## Agents vs. Runners +## Agents vs. runners -Both Jenkins agents and GitLab Runners are the hosts that run jobs. To convert the +Both Jenkins agents and GitLab runners are the hosts that run jobs. To convert the Jenkins agent, simply uninstall it and then [install and register the runner](../runners/README.md). Runners do not require much overhead, so you can size them similarly to the Jenkins agents you were using. -There are some important differences in the way Runners work in comparison to agents: +There are some important differences in the way runners work in comparison to agents: - Runners can be set up as [shared across an instance, be added at the group level, or set up at the project level](../runners/README.md#types-of-runners). They will self-select jobs from the scopes you've defined automatically. - You can also [use tags](../runners/README.md#use-tags-to-limit-the-number-of-jobs-using-the-runner) for finer control, and associate runners with specific jobs. For example, you can use a tag for jobs that require dedicated, more powerful, or specific hardware. -- GitLab has [autoscaling for Runners](https://docs.gitlab.com/runner/configuration/autoscale.html) +- GitLab has [autoscaling for runners](https://docs.gitlab.com/runner/configuration/autoscale.html) which will let you configure them to be provisioned as needed, and scaled down when not. This is similar to ephemeral agents in Jenkins. -If you are using `gitlab.com`, you can take advantage of our [shared Runner fleet](../../user/gitlab_com/index.md#shared-runners) -to run jobs without provisioning your own Runners. We are investigating making them +If you are using `gitlab.com`, you can take advantage of our [shared runner fleet](../../user/gitlab_com/index.md#shared-runners) +to run jobs without provisioning your own runners. We are investigating making them [available for self-managed instances](https://gitlab.com/groups/gitlab-org/-/epics/835) as well. @@ -225,11 +225,11 @@ and is meant to be a mapping of concepts there to concepts in GitLab. #### `agent` -The agent section is used to define how a pipeline will be executed. For GitLab, we use the [GitLab Runner](../runners/README.md) +The agent section is used to define how a pipeline will be executed. For GitLab, we use [runners](../runners/README.md) to provide this capability. You can configure your own runners in Kubernetes or on any host, or take advantage of our shared runner fleet (note that the shared runner fleet is only available for GitLab.com users.) The link above will bring you to the documentation which will describe how to get up and running quickly. We also support using [tags](../runners/README.md#use-tags-to-limit-the-number-of-jobs-using-the-runner) to direct different jobs -to different Runners (execution agents). +to different runners (execution agents). The `agent` section also allows you to define which Docker images should be used for execution, for which we use the [`image`](../yaml/README.md#image) keyword. The `image` can be set on a single job or at the top level, in which diff --git a/doc/development/database_debugging.md b/doc/development/database_debugging.md index 25b62e0d693..61e8ac60bfe 100644 --- a/doc/development/database_debugging.md +++ b/doc/development/database_debugging.md @@ -72,7 +72,7 @@ Use these instructions for exploring the GitLab database while developing with t 1. **Port number to connect to**: `5432` (default). 1. **Use an ssl connection?** - This depends on your installation. Options are: + This depends on your installation. Options are: - **Use Secure Connection** - **Standard Connection** (default) 1. **(Optional) The database to connect to**: `gitlabhq_development`. diff --git a/doc/development/documentation/index.md b/doc/development/documentation/index.md index c864e59c898..5df04f2df58 100644 --- a/doc/development/documentation/index.md +++ b/doc/development/documentation/index.md @@ -701,9 +701,9 @@ To configure markdownlint within your editor, install one of the following as ap To configure Vale within your editor, install one of the following as appropriate: -- The Sublime Text [`SublimeLinter-contrib-vale` plugin](https://packagecontrol.io/packages/SublimeLinter-contrib-vale) -- The Visual Studio Code [`testthedocs.vale` extension](https://marketplace.visualstudio.com/items?itemName=testthedocs.vale) -- [Vim](https://github.com/dense-analysis/ale) +- The Sublime Text [`SublimeLinter-contrib-vale` plugin](https://packagecontrol.io/packages/SublimeLinter-contrib-vale). +- The Visual Studio Code [`errata-ai.vale-server` extension](https://marketplace.visualstudio.com/items?itemName=errata-ai.vale-server). You don't need Vale Server to use the plugin. +- [Vim](https://github.com/dense-analysis/ale). We don't use [Vale Server](https://errata-ai.github.io/vale/#using-vale-with-a-text-editor-or-another-third-party-application). @@ -736,9 +736,7 @@ document: - To disable all Vale linting rules, add a `` tag before the text, and a `` tag after the text. -Whenever possible, exclude only the problematic rule and line(s). In some cases, such as list items, -you may need to disable linting for the entire list until -[Vale issue #175](https://github.com/errata-ai/vale/issues/175) is resolved. +Whenever possible, exclude only the problematic rule and line(s). For more information, see [Vale's documentation](https://errata-ai.gitbook.io/vale/getting-started/markup#markup-based-configuration). diff --git a/doc/development/documentation/styleguide.md b/doc/development/documentation/styleguide.md index 0680256d222..ea13f6beb11 100644 --- a/doc/development/documentation/styleguide.md +++ b/doc/development/documentation/styleguide.md @@ -497,7 +497,7 @@ tenses, words, and phrases: - Instead of "e.g.," use "for example," "such as," "for instance," or "like." - Instead of "etc.," either use "and so on" or consider editing it out, since it can be vague. - + - Avoid using the word *currently* when talking about the product or its features. The documentation describes the product as it is, and not as it will be at some indeterminate point in the future. @@ -534,6 +534,9 @@ tenses, words, and phrases: [user interfaces](https://design.gitlab.com/content/punctuation/#contractions). (Tested in [`Contractions.yml`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/doc/.vale/gitlab/Contractions.yml).) + + + | Do | Don't | |----------|-----------| | it's | it is | @@ -582,7 +585,9 @@ tenses, words, and phrases: | Requests to localhost are not allowed | Requests to localhost aren't allowed | | Specified URL cannot be used | Specified URL can't be used | - + + + ## Text diff --git a/doc/user/packages/package_registry/index.md b/doc/user/packages/package_registry/index.md index fd250c9ac95..f7ee1a4808e 100644 --- a/doc/user/packages/package_registry/index.md +++ b/doc/user/packages/package_registry/index.md @@ -31,7 +31,7 @@ authenticate with GitLab by using the `CI_JOB_TOKEN`. CI/CD templates, which you can use to get started, are in [this repo](https://gitlab.com/gitlab-org/gitlab/-/tree/master/lib/gitlab/ci/templates). -Learn more about [using CI/CD to build Maven packages](../maven_repository/index.md#creating-maven-packages-with-gitlab-cicd), [NPM packages](../npm_registry/index.md#publishing-a-package-with-cicd) and [NuGet Packages](../nuget_repository/index.md#publishing-a-nuget-package-with-cicd). +Learn more about [using CI/CD to build Maven packages](../maven_repository/index.md#creating-maven-packages-with-gitlab-cicd), [NPM packages](../npm_registry/index.md#publishing-a-package-with-cicd), [Composer packages](../composer_repository/index.md#publishing-the-package-with-cicd), and [NuGet packages](../nuget_repository/index.md#publishing-a-nuget-package-with-cicd). If you use CI/CD to build a package, extended activity information is displayed when you view the package details: diff --git a/lib/gitlab/diff/highlight_cache.rb b/lib/gitlab/diff/highlight_cache.rb index 0c3b6b72313..0eb22e6b3cb 100644 --- a/lib/gitlab/diff/highlight_cache.rb +++ b/lib/gitlab/diff/highlight_cache.rb @@ -3,6 +3,7 @@ module Gitlab module Diff class HighlightCache + include Gitlab::Utils::Gzip include Gitlab::Utils::StrongMemoize EXPIRATION = 1.week @@ -83,7 +84,7 @@ module Gitlab redis.hset( key, diff_file_id, - compose_data(highlighted_diff_lines_hash.to_json) + gzip_compress(highlighted_diff_lines_hash.to_json) ) end @@ -145,35 +146,12 @@ module Gitlab end results.map! do |result| - Gitlab::Json.parse(extract_data(result), symbolize_names: true) unless result.nil? + Gitlab::Json.parse(gzip_decompress(result), symbolize_names: true) unless result.nil? end file_paths.zip(results).to_h end - def compose_data(json_data) - # #compress returns ASCII-8BIT, so we need to force the encoding to - # UTF-8 before caching it in redis, else we risk encoding mismatch - # errors. - # - ActiveSupport::Gzip.compress(json_data).force_encoding("UTF-8") - rescue Zlib::GzipFile::Error - json_data - end - - def extract_data(data) - # Since we could be dealing with an already populated cache full of data - # that isn't gzipped, we want to also check to see if the data is - # gzipped before we attempt to #decompress it, thus we check the first - # 2 bytes for "\x1F\x8B" to confirm it is a gzipped string. While a - # non-gzipped string will raise a Zlib::GzipFile::Error, which we're - # rescuing, we don't want to count on rescue for control flow. - # - data[0..1] == "\x1F\x8B" ? ActiveSupport::Gzip.decompress(data) : data - rescue Zlib::GzipFile::Error - data - end - def cacheable?(diff_file) diffable.present? && diff_file.text? && diff_file.diffable? end diff --git a/lib/gitlab/discussions_diff/highlight_cache.rb b/lib/gitlab/discussions_diff/highlight_cache.rb index 4bec6467c1a..3337aeb9262 100644 --- a/lib/gitlab/discussions_diff/highlight_cache.rb +++ b/lib/gitlab/discussions_diff/highlight_cache.rb @@ -3,6 +3,8 @@ module Gitlab module DiscussionsDiff class HighlightCache + extend Gitlab::Utils::Gzip + class << self VERSION = 1 EXPIRATION = 1.week @@ -17,7 +19,7 @@ module Gitlab mapping.each do |raw_key, value| key = cache_key_for(raw_key) - multi.set(key, value.to_json, ex: EXPIRATION) + multi.set(key, gzip_compress(value.to_json), ex: EXPIRATION) end end end @@ -44,7 +46,7 @@ module Gitlab content.map! do |lines| next unless lines - Gitlab::Json.parse(lines).map! do |line| + Gitlab::Json.parse(gzip_decompress(lines)).map! do |line| Gitlab::Diff::Line.safe_init_from_hash(line) end end diff --git a/lib/gitlab/utils/gzip.rb b/lib/gitlab/utils/gzip.rb new file mode 100644 index 00000000000..898be651554 --- /dev/null +++ b/lib/gitlab/utils/gzip.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Gitlab + module Utils + module Gzip + def gzip_compress(data) + # .compress returns ASCII-8BIT, so we need to force the encoding to + # UTF-8 before caching it in redis, else we risk encoding mismatch + # errors. + # + ActiveSupport::Gzip.compress(data).force_encoding("UTF-8") + rescue Zlib::GzipFile::Error + data + end + + def gzip_decompress(data) + # Since we could be dealing with an already populated cache full of data + # that isn't gzipped, we want to also check to see if the data is + # gzipped before we attempt to .decompress it, thus we check the first + # 2 bytes for "\x1F\x8B" to confirm it is a gzipped string. While a + # non-gzipped string will raise a Zlib::GzipFile::Error, which we're + # rescuing, we don't want to count on rescue for control flow. + # + data[0..1] == "\x1F\x8B" ? ActiveSupport::Gzip.decompress(data) : data + rescue Zlib::GzipFile::Error + data + end + end + end +end diff --git a/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb b/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb index 9f10811d765..30981e4bd7d 100644 --- a/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb +++ b/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb @@ -33,9 +33,9 @@ RSpec.describe Gitlab::DiscussionsDiff::HighlightCache, :clean_gitlab_redis_cach mapping.each do |key, value| full_key = described_class.cache_key_for(key) - found = Gitlab::Redis::Cache.with { |r| r.get(full_key) } + found_key = Gitlab::Redis::Cache.with { |r| r.get(full_key) } - expect(found).to eq(value.to_json) + expect(described_class.gzip_decompress(found_key)).to eq(value.to_json) end end end diff --git a/spec/lib/gitlab/utils/gzip_spec.rb b/spec/lib/gitlab/utils/gzip_spec.rb new file mode 100644 index 00000000000..5d1c62e03d3 --- /dev/null +++ b/spec/lib/gitlab/utils/gzip_spec.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +RSpec.describe Gitlab::Utils::Gzip do + before do + example_class = Class.new do + include Gitlab::Utils::Gzip + + def lorem_ipsum + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod "\ + "tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim "\ + "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea "\ + "commodo consequat. Duis aute irure dolor in reprehenderit in voluptate "\ + "velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat "\ + "cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id "\ + "est laborum." + end + end + + stub_const('ExampleClass', example_class) + end + + subject { ExampleClass.new } + + let(:sample_string) { subject.lorem_ipsum } + let(:compressed_string) { subject.gzip_compress(sample_string) } + + describe "#gzip_compress" do + it "compresses data passed to it" do + expect(compressed_string.length).to be < sample_string.length + end + + it "returns uncompressed data when encountering Zlib::GzipFile::Error" do + expect(ActiveSupport::Gzip).to receive(:compress).and_raise(Zlib::GzipFile::Error) + + expect(compressed_string.length).to eq sample_string.length + end + end + + describe "#gzip_decompress" do + let(:decompressed_string) { subject.gzip_decompress(compressed_string) } + + it "decompresses encoded data" do + expect(decompressed_string).to eq sample_string + end + + it "returns compressed data when encountering Zlib::GzipFile::Error" do + expect(ActiveSupport::Gzip).to receive(:decompress).and_raise(Zlib::GzipFile::Error) + + expect(decompressed_string).not_to eq sample_string.length + end + + it "returns unmodified data when it is determined to be uncompressed" do + expect(subject.gzip_decompress(sample_string)).to eq sample_string + end + end +end -- GitLab