diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index ddeaf053225937907833f0c958d0d9a7acb5a9f2..a9f50cdda0e809f644db5a64d8c15542ba170d20 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -76,6 +76,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, return left_column.compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint); } +/// Get first and last row from sorted block Block extractMinMax(const Block & block, const Block & keys) { if (block.rows() == 0) @@ -86,7 +87,7 @@ Block extractMinMax(const Block & block, const Block & keys) for (size_t i = 0; i < columns.size(); ++i) { - const auto & src_column = block.getByName(keys.getByPosition(i).name); + const auto & src_column = block.getByName(min_max.getByPosition(i).name); columns[i]->insertFrom(*src_column.column, 0); columns[i]->insertFrom(*src_column.column, block.rows() - 1); @@ -465,6 +466,7 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right table_join->splitAdditionalColumns(right_sample_block, right_table_keys, right_columns_to_add); JoinCommon::removeLowCardinalityInplace(right_table_keys); + JoinCommon::removeLowCardinalityInplace(right_sample_block, table_join->keyNamesRight()); const NameSet required_right_keys = table_join->requiredRightKeys(); for (const auto & column : right_table_keys) @@ -485,6 +487,7 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right left_blocks_buffer = std::make_shared(left_sort_description, max_bytes); } +/// Has to be called even if totals are empty void MergeJoin::setTotals(const Block & totals_block) { totals = totals_block; diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index a13d0304907993132d7a2e5316c561997fb6f546..d145a69ce9dcf51d7ae870623779d394c0833b9b 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -76,12 +76,15 @@ private: Block right_table_keys; Block right_columns_to_add; SortedBlocksWriter::Blocks right_blocks; + + /// Each block stores first and last row from corresponding sorted block on disk Blocks min_max_right_blocks; std::shared_ptr left_blocks_buffer; std::shared_ptr used_rows_bitmap; mutable std::unique_ptr cached_right_blocks; std::vector> loaded_right_blocks; std::unique_ptr disk_writer; + /// Set of files with sorted blocks SortedBlocksWriter::SortedFiles flushed_right_blocks; Block totals; std::atomic is_in_memory{true}; diff --git a/tests/queries/0_stateless/01785_pmj_lc_bug.reference b/tests/queries/0_stateless/01785_pmj_lc_bug.reference new file mode 100644 index 0000000000000000000000000000000000000000..98fb6a686563963b8f7e552d747158adbc1c2bd6 --- /dev/null +++ b/tests/queries/0_stateless/01785_pmj_lc_bug.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01785_pmj_lc_bug.sql b/tests/queries/0_stateless/01785_pmj_lc_bug.sql new file mode 100644 index 0000000000000000000000000000000000000000..722faa9b40d61cad7728cfd8de08a2e73e7a4281 --- /dev/null +++ b/tests/queries/0_stateless/01785_pmj_lc_bug.sql @@ -0,0 +1,14 @@ +SET join_algorithm = 'partial_merge'; +SET max_bytes_in_join = '100'; + +CREATE TABLE foo_lc (n LowCardinality(String)) ENGINE = Memory; +CREATE TABLE foo (n String) ENGINE = Memory; + +INSERT INTO foo SELECT toString(number) AS n FROM system.numbers LIMIT 1025; +INSERT INTO foo_lc SELECT toString(number) AS n FROM system.numbers LIMIT 1025; + +SELECT 1025 == count(n) FROM foo_lc AS t1 ANY LEFT JOIN foo_lc AS t2 ON t1.n == t2.n; +SELECT 1025 == count(n) FROM foo AS t1 ANY LEFT JOIN foo_lc AS t2 ON t1.n == t2.n; +SELECT 1025 == count(n) FROM foo_lc AS t1 ANY LEFT JOIN foo AS t2 ON t1.n == t2.n; + +SELECT 1025 == count(n) FROM foo_lc AS t1 ALL LEFT JOIN foo_lc AS t2 ON t1.n == t2.n;