This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 3b5f4ad198 [fix](unique-key-merge-on-write) fix that unique key with mow may loss some data in the query result with predicates (#14455) 3b5f4ad198 is described below commit 3b5f4ad1980afb1d5730724aa8e0e2bb14239ec5 Author: Xin Liao <liaoxin...@126.com> AuthorDate: Wed Nov 23 09:08:07 2022 +0800 [fix](unique-key-merge-on-write) fix that unique key with mow may loss some data in the query result with predicates (#14455) When unique key with MOW table has sequence column, the query result may be wrong with predicates. There are two problems: The sequence column needs to be removed from primary key index when comparing key. The sequence column needs to be removed from min/max key. --- be/src/olap/primary_key_index.cpp | 6 +- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 28 ++++++++ .../primary_index/test_unique_mow_sequence.out | 25 +++++++ .../primary_index/test_unique_mow_sequence.groovy | 83 ++++++++++++++++++++++ 4 files changed, 139 insertions(+), 3 deletions(-) diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp index 79f6e782cd..6e2c3d954d 100644 --- a/be/src/olap/primary_key_index.cpp +++ b/be/src/olap/primary_key_index.cpp @@ -60,9 +60,9 @@ Status PrimaryKeyIndexBuilder::finalize(segment_v2::PrimaryKeyIndexMetaPB* meta) // finish primary key index RETURN_IF_ERROR(_primary_key_index_builder->finish(meta->mutable_primary_key_index())); - // set min_max key - meta->set_min_key(_min_key.ToString()); - meta->set_max_key(_max_key.ToString()); + // set min_max key, the sequence column should be removed + meta->set_min_key(min_key().to_string()); + meta->set_max_key(max_key().to_string()); // finish bloom filter index RETURN_IF_ERROR(_bloom_filter_index_builder->flush()); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index a4ca422e69..e6eff66685 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -525,6 +525,34 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool } *rowid = index_iterator->get_current_ordinal(); + // The sequence column needs to be removed from primary key index when comparing key + bool has_seq_col = _segment->_tablet_schema->has_sequence_col(); + if (has_seq_col) { + size_t seq_col_length = + _segment->_tablet_schema->column(_segment->_tablet_schema->sequence_col_idx()) + .length() + + 1; + MemPool pool; + size_t num_to_read = 1; + std::unique_ptr<ColumnVectorBatch> cvb; + RETURN_IF_ERROR(ColumnVectorBatch::create( + num_to_read, false, _segment->_pk_index_reader->type_info(), nullptr, &cvb)); + ColumnBlock block(cvb.get(), &pool); + ColumnBlockView column_block_view(&block); + size_t num_read = num_to_read; + RETURN_IF_ERROR(index_iterator->next_batch(&num_read, &column_block_view)); + DCHECK(num_to_read == num_read); + + const Slice* sought_key = reinterpret_cast<const Slice*>(cvb->cell_ptr(0)); + Slice sought_key_without_seq = + Slice(sought_key->get_data(), sought_key->get_size() - seq_col_length); + + // compare key + if (Slice(index_key).compare(sought_key_without_seq) == 0) { + exact_match = true; + } + } + // find the key in primary key index, and the is_include is false, so move // to the next row. if (exact_match && !is_include) { diff --git a/regression-test/data/primary_index/test_unique_mow_sequence.out b/regression-test/data/primary_index/test_unique_mow_sequence.out new file mode 100644 index 0000000000..65b68e4152 --- /dev/null +++ b/regression-test/data/primary_index/test_unique_mow_sequence.out @@ -0,0 +1,25 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 Customer#000000001 j5JsirBM9P MOROCCO 0 MOROCCO AFRICA 25-989-741-2988 BUILDING +2 Customer#000000002 487LW1dovn6Q4dMVym JORDAN 1 JORDAN MIDDLE EAST 23-768-687-3665 AUTOMOBILE +3 Customer#000000003 fkRGN8n ARGENTINA7 ARGENTINA AMERICA 11-719-748-3364 AUTOMOBILE +4 Customer#000000004 4u58h f EGYPT 4 EGYPT MIDDLE EAST 14-128-190-5944 MACHINERY +5 Customer#000000005 hwBtxkoBF qSW4KrI CANADA 5 CANADA AMERICA 13-750-942-6364 HOUSEHOLD + +-- !sql -- +2996 Customer#000002996 PFd,H,pC PERU 1 PERU AMERICA 27-412-836-3763 FURNITURE +2997 Customer#000002997 LiVKxN3lQHLunID ALGERIA 0 ALGERIA AFRICA 10-600-583-9608 FURNITURE +2998 Customer#000002998 waJRUwjblh3sJbglX9gS9w PERU 7 PERU AMERICA 27-747-219-4938 AUTOMOBILE +2999 Customer#000002999 HaPy4sQ MiANd0pR5uA7 VIETNAM 5 VIETNAM ASIA 31-297-683-9811 MACHINERY +3000 Customer#000003000 ,5Yw1O EGYPT 4 EGYPT MIDDLE EAST 14-645-615-5901 FURNITURE + +-- !sql -- +1 Customer#000000001 j5JsirBM9P MOROCCO 0 MOROCCO AFRICA 25-989-741-2988 BUILDING + +-- !sql -- +3000 Customer#000003000 ,5Yw1O EGYPT 4 EGYPT MIDDLE EAST 14-645-615-5901 FURNITURE + +-- !sql -- + +-- !sql -- + diff --git a/regression-test/suites/primary_index/test_unique_mow_sequence.groovy b/regression-test/suites/primary_index/test_unique_mow_sequence.groovy new file mode 100644 index 0000000000..2612712165 --- /dev/null +++ b/regression-test/suites/primary_index/test_unique_mow_sequence.groovy @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_unique_mow_sequence") { + def tableName = "test_mow_sequence" + sql """ DROP TABLE IF EXISTS $tableName """ + sql """ + CREATE TABLE `$tableName` ( + `c_custkey` int(11) NOT NULL COMMENT "", + `c_name` varchar(26) NOT NULL COMMENT "", + `c_address` varchar(41) NOT NULL COMMENT "", + `c_city` varchar(11) NOT NULL COMMENT "", + `c_nation` varchar(16) NOT NULL COMMENT "", + `c_region` varchar(13) NOT NULL COMMENT "", + `c_phone` varchar(16) NOT NULL COMMENT "", + `c_mktsegment` varchar(11) NOT NULL COMMENT "" + ) + UNIQUE KEY (`c_custkey`) + DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 10 + PROPERTIES ( + "function_column.sequence_type" = 'int', + "compression"="zstd", + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true" + ); + """ + + streamLoad { + table "${tableName}" + + set 'column_separator', '|' + set 'compress_type', 'GZ' + set 'columns', 'c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use' + set 'function_column.sequence_col', 'c_custkey' + + file """${context.sf1DataPath}/ssb/sf0.1/customer.tbl.gz""" + + time 10000 // limit inflight 10s + + // stream load action will check result, include Success status, and NumberTotalRows == NumberLoadedRows + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + + sql "sync" + + order_qt_sql "select * from $tableName where c_custkey < 6;" + + order_qt_sql "select * from $tableName where c_custkey > 2995;" + + qt_sql "select * from $tableName where c_custkey = 1;" + + qt_sql "select * from $tableName where c_custkey = 3000;" + + qt_sql "select * from $tableName where c_custkey = 3001;" + + qt_sql "select * from $tableName where c_custkey = 0;" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org