From f9565e303916ca194ef63b5fd3de541bf1c2a170 Mon Sep 17 00:00:00 2001 From: Zeger-Jan van de Weg Date: Fri, 3 Nov 2017 14:16:43 +0100 Subject: [PATCH] Batchload blobs for diff generation After installing a new gem, batch-loader, a construct can be used to queue data to be fetched in bulk. The gem was also introduced in both gitlab-org/gitlab-ce!14680 and gitlab-org/gitlab-ce!14846, but those mrs are not merged yet. For the generation of diffs, both the old blob and the new blob need to be loaded. This for every file in the diff, too. Now we collect all these so we do 1 fetch. Three `.allow_n_plus_1_calls` have been removed, which I expect to be valid, but this needs to be confirmed by a full CI run. Possibly closes: - https://gitlab.com/gitlab-org/gitlab-ce/issues/37445 - https://gitlab.com/gitlab-org/gitlab-ce/issues/37599 - https://gitlab.com/gitlab-org/gitlab-ce/issues/37431 --- Gemfile | 2 ++ Gemfile.lock | 2 ++ app/controllers/projects/commit_controller.rb | 9 ++------ .../merge_requests/diffs_controller.rb | 5 +---- app/models/blob.rb | 17 +++++++++++++- app/models/commit.rb | 4 ++-- app/models/repository.rb | 5 +++++ changelogs/unreleased/zj-commit-show-n-1.yml | 5 +++++ config/initializers/batch_loader.rb | 1 + lib/gitlab/diff/file.rb | 18 +++++++-------- lib/gitlab/diff/file_collection/base.rb | 5 +---- lib/gitlab/git/blob.rb | 2 ++ lib/gitlab/git/repository.rb | 5 +++++ .../projects/commit_controller_spec.rb | 6 ++--- spec/lib/gitlab/diff/file_spec.rb | 8 ++----- spec/models/blob_spec.rb | 17 ++++++++++++++ spec/models/diff_viewer/base_spec.rb | 22 +++++++------------ spec/models/diff_viewer/server_side_spec.rb | 9 ++++---- 18 files changed, 86 insertions(+), 56 deletions(-) create mode 100644 changelogs/unreleased/zj-commit-show-n-1.yml create mode 100644 config/initializers/batch_loader.rb diff --git a/Gemfile b/Gemfile index 5630f91c054..6034323956c 100644 --- a/Gemfile +++ b/Gemfile @@ -263,6 +263,8 @@ gem 'gettext_i18n_rails', '~> 1.8.0' gem 'gettext_i18n_rails_js', '~> 1.2.0' gem 'gettext', '~> 3.2.2', require: false, group: :development +gem 'batch-loader' + # Perf bar gem 'peek', '~> 1.0.1' gem 'peek-gc', '~> 0.0.2' diff --git a/Gemfile.lock b/Gemfile.lock index 8f6ffa58e5d..4787be92365 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -73,6 +73,7 @@ GEM thread_safe (~> 0.3, >= 0.3.1) babosa (1.0.2) base32 (0.3.2) + batch-loader (1.1.1) bcrypt (3.1.11) bcrypt_pbkdf (1.0.0) benchmark-ips (2.3.0) @@ -982,6 +983,7 @@ DEPENDENCIES awesome_print (~> 1.2.0) babosa (~> 1.0.2) base32 (~> 0.3.0) + batch-loader bcrypt_pbkdf (~> 1.0) benchmark-ips (~> 2.3.0) better_errors (~> 2.1.0) diff --git a/app/controllers/projects/commit_controller.rb b/app/controllers/projects/commit_controller.rb index 494d412b532..6ff96a3f295 100644 --- a/app/controllers/projects/commit_controller.rb +++ b/app/controllers/projects/commit_controller.rb @@ -22,12 +22,7 @@ class Projects::CommitController < Projects::ApplicationController apply_diff_view_cookie! respond_to do |format| - format.html do - # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37599 - Gitlab::GitalyClient.allow_n_plus_1_calls do - render - end - end + format.html { render } format.diff { render text: @commit.to_diff } format.patch { render text: @commit.to_patch } end @@ -112,7 +107,7 @@ class Projects::CommitController < Projects::ApplicationController end def commit - @noteable = @commit ||= @project.commit(params[:id]) + @noteable = @commit ||= @project.commit_by(oid: params[:id]) end def define_commit_vars diff --git a/app/controllers/projects/merge_requests/diffs_controller.rb b/app/controllers/projects/merge_requests/diffs_controller.rb index 7d16e77ef66..d60a24d3f1d 100644 --- a/app/controllers/projects/merge_requests/diffs_controller.rb +++ b/app/controllers/projects/merge_requests/diffs_controller.rb @@ -10,10 +10,7 @@ class Projects::MergeRequests::DiffsController < Projects::MergeRequests::Applic def show @environment = @merge_request.environments_for(current_user).last - # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37431 - Gitlab::GitalyClient.allow_n_plus_1_calls do - render json: { html: view_to_html_string("projects/merge_requests/diffs/_diffs") } - end + render json: { html: view_to_html_string("projects/merge_requests/diffs/_diffs") } end def diff_for_path diff --git a/app/models/blob.rb b/app/models/blob.rb index ad0bc2e2ead..29e762724e3 100644 --- a/app/models/blob.rb +++ b/app/models/blob.rb @@ -76,12 +76,24 @@ class Blob < SimpleDelegator new(blob, project) end + def self.lazy(project, commit_id, path) + BatchLoader.for(commit_id: commit_id, path: path).batch do |items, loader| + project.repository.blobs_at(items.map(&:values)).each do |blob| + loader.call({ commit_id: blob.commit_id, path: blob.path }, blob) if blob + end + end + end + def initialize(blob, project = nil) @project = project super(blob) end + def inspect + "#<#{self.class.name} oid:#{id[0..8]} commit:#{commit_id[0..8]} path:#{path}>" + end + # Returns the data of the blob. # # If the blob is a text based blob the content is converted to UTF-8 and any @@ -95,7 +107,10 @@ class Blob < SimpleDelegator end def load_all_data! - super(project.repository) if project + # Endpoint needed: gitlab-org/gitaly#756 + Gitlab::GitalyClient.allow_n_plus_1_calls do + super(project.repository) if project + end end def no_highlighting? diff --git a/app/models/commit.rb b/app/models/commit.rb index a31ebe9cc87..8401d99a08f 100644 --- a/app/models/commit.rb +++ b/app/models/commit.rb @@ -84,7 +84,7 @@ class Commit end def id - @raw.id + raw.id end def ==(other) @@ -361,7 +361,7 @@ class Commit @deltas ||= raw.deltas end - def diffs(diff_options = nil) + def diffs(diff_options = {}) Gitlab::Diff::FileCollection::Commit.new(self, diff_options: diff_options) end diff --git a/app/models/repository.rb b/app/models/repository.rb index 8a6a8377de9..26d1bc12232 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -478,6 +478,11 @@ class Repository nil end + # items is an Array like: [[oid, path], [oid1, path1]] + def blobs_at(items) + raw_repository.batch_blobs(items).map { |blob| Blob.decorate(blob, project) } + end + def root_ref if raw_repository raw_repository.root_ref diff --git a/changelogs/unreleased/zj-commit-show-n-1.yml b/changelogs/unreleased/zj-commit-show-n-1.yml new file mode 100644 index 00000000000..e536434f74a --- /dev/null +++ b/changelogs/unreleased/zj-commit-show-n-1.yml @@ -0,0 +1,5 @@ +--- +title: Fetch blobs in bulk when generating diffs +merge_request: +author: +type: performance diff --git a/config/initializers/batch_loader.rb b/config/initializers/batch_loader.rb new file mode 100644 index 00000000000..2e2256b0eb9 --- /dev/null +++ b/config/initializers/batch_loader.rb @@ -0,0 +1 @@ +Rails.application.config.middleware.use(BatchLoader::Middleware) diff --git a/lib/gitlab/diff/file.rb b/lib/gitlab/diff/file.rb index ea5891a028a..d0cfe2386ca 100644 --- a/lib/gitlab/diff/file.rb +++ b/lib/gitlab/diff/file.rb @@ -25,6 +25,10 @@ module Gitlab @repository = repository @diff_refs = diff_refs @fallback_diff_refs = fallback_diff_refs + + # Ensure items are collected in the the batch + new_blob + old_blob end def position(position_marker, position_type: :text) @@ -95,21 +99,15 @@ module Gitlab end def new_blob - return @new_blob if defined?(@new_blob) - - sha = new_content_sha - return @new_blob = nil unless sha + return unless new_content_sha - @new_blob = repository.blob_at(sha, file_path) + Blob.lazy(repository.project, new_content_sha, file_path) end def old_blob - return @old_blob if defined?(@old_blob) - - sha = old_content_sha - return @old_blob = nil unless sha + return unless old_content_sha - @old_blob = repository.blob_at(sha, old_path) + Blob.lazy(repository.project, old_content_sha, old_path) end def content_sha diff --git a/lib/gitlab/diff/file_collection/base.rb b/lib/gitlab/diff/file_collection/base.rb index 88ae65cb468..a6007ebf531 100644 --- a/lib/gitlab/diff/file_collection/base.rb +++ b/lib/gitlab/diff/file_collection/base.rb @@ -22,10 +22,7 @@ module Gitlab end def diff_files - # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37445 - Gitlab::GitalyClient.allow_n_plus_1_calls do - @diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) } - end + @diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) } end def diff_file_with_old_path(old_path) diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index bd5039fb87e..ddd52136bc4 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -179,6 +179,8 @@ module Gitlab ) end end + rescue Rugged::ReferenceError + nil end def rugged_raw(repository, sha, limit:) diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 0c522deb6fa..3cb9b254e6e 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -1161,6 +1161,11 @@ module Gitlab Gitlab::Git::Blob.find(self, sha, path) unless Gitlab::Git.blank_ref?(sha) end + # Items should be of format [[commit_id, path], [commit_id1, path1]] + def batch_blobs(items, blob_size_limit: nil) + Gitlab::Git::Blob.batch(self, items, blob_size_limit: blob_size_limit) + end + def commit_index(user, branch_name, index, options) committer = user_to_committer(user) diff --git a/spec/controllers/projects/commit_controller_spec.rb b/spec/controllers/projects/commit_controller_spec.rb index 5dc27e2bbba..fd90c0d8bad 100644 --- a/spec/controllers/projects/commit_controller_spec.rb +++ b/spec/controllers/projects/commit_controller_spec.rb @@ -1,15 +1,15 @@ require 'spec_helper' describe Projects::CommitController do - let(:project) { create(:project, :repository) } - let(:user) { create(:user) } + set(:project) { create(:project, :repository) } + set(:user) { create(:user) } let(:commit) { project.commit("master") } let(:master_pickable_sha) { '7d3b0f7cff5f37573aea97cebfd5692ea1689924' } let(:master_pickable_commit) { project.commit(master_pickable_sha) } before do sign_in(user) - project.team << [user, :master] + project.add_master(user) end describe 'GET show' do diff --git a/spec/lib/gitlab/diff/file_spec.rb b/spec/lib/gitlab/diff/file_spec.rb index c91895cedc3..ff9acfd08b9 100644 --- a/spec/lib/gitlab/diff/file_spec.rb +++ b/spec/lib/gitlab/diff/file_spec.rb @@ -116,12 +116,8 @@ describe Gitlab::Diff::File do end context 'when renamed' do - let(:commit) { project.commit('6907208d755b60ebeacb2e9dfea74c92c3449a1f') } - let(:diff_file) { commit.diffs.diff_file_with_new_path('files/js/commit.coffee') } - - before do - allow(diff_file.new_blob).to receive(:id).and_return(diff_file.old_blob.id) - end + let(:commit) { project.commit('94bb47ca1297b7b3731ff2a36923640991e9236f') } + let(:diff_file) { commit.diffs.diff_file_with_new_path('CHANGELOG.md') } it 'returns false' do expect(diff_file.content_changed?).to be_falsey diff --git a/spec/models/blob_spec.rb b/spec/models/blob_spec.rb index 47342f98283..81e35e6c931 100644 --- a/spec/models/blob_spec.rb +++ b/spec/models/blob_spec.rb @@ -16,6 +16,23 @@ describe Blob do end end + describe '.lazy' do + let(:project) { create(:project, :repository) } + let(:commit) { project.commit_by(oid: 'e63f41fe459e62e1228fcef60d7189127aeba95a') } + + it 'fetches all blobs when the first is accessed' do + changelog = described_class.lazy(project, commit.id, 'CHANGELOG') + contributing = described_class.lazy(project, commit.id, 'CONTRIBUTING.md') + + expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original + expect(Gitlab::Git::Blob).not_to receive(:find) + + # Access property so the values are loaded + changelog.id + contributing.id + end + end + describe '#data' do context 'using a binary blob' do it 'returns the data as-is' do diff --git a/spec/models/diff_viewer/base_spec.rb b/spec/models/diff_viewer/base_spec.rb index b26de3f3b97..c90b32c5d77 100644 --- a/spec/models/diff_viewer/base_spec.rb +++ b/spec/models/diff_viewer/base_spec.rb @@ -32,10 +32,8 @@ describe DiffViewer::Base do end context 'when the binaryness does not match' do - before do - allow(diff_file.old_blob).to receive(:binary?).and_return(false) - allow(diff_file.new_blob).to receive(:binary?).and_return(false) - end + let(:commit) { project.commit_by(oid: 'ae73cb07c9eeaf35924a10f713b364d32b2dd34f') } + let(:diff_file) { commit.diffs.diff_file_with_new_path('Gemfile.zip') } it 'returns false' do expect(viewer_class.can_render?(diff_file)).to be_falsey @@ -60,8 +58,7 @@ describe DiffViewer::Base do context 'when the binaryness does not match' do before do - allow(diff_file.old_blob).to receive(:binary?).and_return(true) - allow(diff_file.new_blob).to receive(:binary?).and_return(true) + allow_any_instance_of(Blob).to receive(:binary?).and_return(true) end it 'returns false' do @@ -77,12 +74,12 @@ describe DiffViewer::Base do end context 'when the file was renamed and only the old blob is supported' do - let(:commit) { project.commit('2f63565e7aac07bcdadb654e253078b727143ec4') } + let(:commit) { project.commit_by(oid: '2f63565e7aac07bcdadb654e253078b727143ec4') } let(:diff_file) { commit.diffs.diff_file_with_new_path('files/images/6049019_460s.jpg') } before do allow(diff_file).to receive(:renamed_file?).and_return(true) - allow(diff_file.new_blob).to receive(:extension).and_return('jpeg') + viewer_class.extensions = %w(notjpg) end it 'returns false' do @@ -94,8 +91,7 @@ describe DiffViewer::Base do describe '#collapsed?' do context 'when the combined blob size is larger than the collapse limit' do before do - allow(diff_file.old_blob).to receive(:raw_size).and_return(512.kilobytes) - allow(diff_file.new_blob).to receive(:raw_size).and_return(513.kilobytes) + allow(diff_file).to receive(:raw_size).and_return(1025.kilobytes) end it 'returns true' do @@ -113,8 +109,7 @@ describe DiffViewer::Base do describe '#too_large?' do context 'when the combined blob size is larger than the size limit' do before do - allow(diff_file.old_blob).to receive(:raw_size).and_return(2.megabytes) - allow(diff_file.new_blob).to receive(:raw_size).and_return(4.megabytes) + allow(diff_file).to receive(:raw_size).and_return(6.megabytes) end it 'returns true' do @@ -132,8 +127,7 @@ describe DiffViewer::Base do describe '#render_error' do context 'when the combined blob size is larger than the size limit' do before do - allow(diff_file.old_blob).to receive(:raw_size).and_return(2.megabytes) - allow(diff_file.new_blob).to receive(:raw_size).and_return(4.megabytes) + allow(diff_file).to receive(:raw_size).and_return(6.megabytes) end it 'returns :too_large' do diff --git a/spec/models/diff_viewer/server_side_spec.rb b/spec/models/diff_viewer/server_side_spec.rb index 92e613f92de..98a8f6d4cc9 100644 --- a/spec/models/diff_viewer/server_side_spec.rb +++ b/spec/models/diff_viewer/server_side_spec.rb @@ -1,9 +1,9 @@ require 'spec_helper' describe DiffViewer::ServerSide do - let(:project) { create(:project, :repository) } - let(:commit) { project.commit('570e7b2abdd848b95f2f578043fc23bd6f6fd24d') } - let(:diff_file) { commit.diffs.diff_file_with_new_path('files/ruby/popen.rb') } + set(:project) { create(:project, :repository) } + let(:commit) { project.commit_by(oid: '570e7b2abdd848b95f2f578043fc23bd6f6fd24d') } + let!(:diff_file) { commit.diffs.diff_file_with_new_path('files/ruby/popen.rb') } let(:viewer_class) do Class.new(DiffViewer::Base) do @@ -15,8 +15,7 @@ describe DiffViewer::ServerSide do describe '#prepare!' do it 'loads all diff file data' do - expect(diff_file.old_blob).to receive(:load_all_data!) - expect(diff_file.new_blob).to receive(:load_all_data!) + expect(Blob).to receive(:lazy).at_least(:twice) subject.prepare! end -- GitLab