This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit 0aa24e027af020d15030edc714ef41d579e61fdb Author: Xin Liao <liaoxin...@126.com> AuthorDate: Thu Jul 13 19:56:00 2023 +0800 [fix](merge-on-write) fix wrong result when query with prefix key predicate (#21770) --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +- be/src/util/key_util.h | 7 +- .../correctness_p0/test_select_with_prefix.out | 93 +++++++++++++ .../correctness_p0/test_select_with_prefix.groovy | 149 +++++++++++++++++++++ 4 files changed, 251 insertions(+), 2 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index cb9dff43c1..968177404e 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -537,8 +537,10 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool DCHECK(pk_index_reader != nullptr); std::string index_key; + // when is_include is false, we shoudle append KEY_NORMAL_MARKER to the + // encode key. Otherwise, we will get an incorrect upper bound. encode_key_with_padding<RowCursor, true, true>( - &index_key, key, _segment->_tablet_schema->num_key_columns(), is_include); + &index_key, key, _segment->_tablet_schema->num_key_columns(), is_include, true); if (index_key < _segment->min_key()) { *rowid = 0; return Status::OK(); diff --git a/be/src/util/key_util.h b/be/src/util/key_util.h index 13cb5c1768..8a5b7613a8 100644 --- a/be/src/util/key_util.h +++ b/be/src/util/key_util.h @@ -58,15 +58,20 @@ constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF; // fill a marker and return. If padding_minimal is true, KEY_MINIMAL_MARKER will // be added, if padding_minimal is false, KEY_MAXIMAL_MARKER will be added. // If all num_keys are found in row, no marker will be added. +// if padding_minimal is false and padding_normal_marker is true, +// KEY_NORMAL_MARKER will be added. template <typename RowType, bool null_first = true, bool full_encode = false> void encode_key_with_padding(std::string* buf, const RowType& row, size_t num_keys, - bool padding_minimal) { + bool padding_minimal, bool padding_normal_marker = false) { for (auto cid = 0; cid < num_keys; cid++) { auto field = row.schema()->column(cid); if (field == nullptr) { if (padding_minimal) { buf->push_back(KEY_MINIMAL_MARKER); } else { + if (padding_normal_marker) { + buf->push_back(KEY_NORMAL_MARKER); + } buf->push_back(KEY_MAXIMAL_MARKER); } break; diff --git a/regression-test/data/correctness_p0/test_select_with_prefix.out b/regression-test/data/correctness_p0/test_select_with_prefix.out new file mode 100644 index 0000000000..8b59f59f49 --- /dev/null +++ b/regression-test/data/correctness_p0/test_select_with_prefix.out @@ -0,0 +1,93 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- + +-- !sql -- + +-- !sql -- +abcd efgh 1 abcdad 2023-06-27T00:00 1 +abcd efgh 2 abcdad 2023-06-27T00:00 1 +abcd efghf 2 abcdad 2023-06-27T00:00 1 + +-- !sql -- +abcd +abcd +abcd + +-- !sql -- + +-- !sql -- +efgh +efgh +efghf + +-- !sql -- + efgh 1 abcdad 2023-06-27T00:00 1 + mefgh 1 abcdad 2023-06-27T00:00 1 + +-- !sql -- + + + +-- !sql -- +2 + +-- !sql -- + +-- !sql -- + +-- !sql -- + efgh 1 abcdad 2023-06-27T00:00 1 +abcd efgh 1 abcdad 2023-06-27T00:00 1 +abcd efgh 2 abcdad 2023-06-27T00:00 1 + +-- !sql -- + +abcd +abcd + +-- !sql -- + +-- !sql -- + +-- !sql -- +abcd efgh 1 abcdad 2023-06-27T00:00 1 +abcd efgh 2 abcdad 2023-06-27T00:00 1 +abcd efghf 2 abcdad 2023-06-27T00:00 1 + +-- !sql -- +abcd +abcd +abcd + +-- !sql -- + +-- !sql -- +efgh +efgh +efghf + +-- !sql -- + efgh 1 abcdad 2023-06-27T00:00 1 + mefgh 1 abcdad 2023-06-27T00:00 1 + +-- !sql -- + + + +-- !sql -- +2 + +-- !sql -- + +-- !sql -- + +-- !sql -- + efgh 1 abcdad 2023-06-27T00:00 1 +abcd efgh 1 abcdad 2023-06-27T00:00 1 +abcd efgh 2 abcdad 2023-06-27T00:00 1 + +-- !sql -- + +abcd +abcd + diff --git a/regression-test/suites/correctness_p0/test_select_with_prefix.groovy b/regression-test/suites/correctness_p0/test_select_with_prefix.groovy new file mode 100644 index 0000000000..c12e0da4c1 --- /dev/null +++ b/regression-test/suites/correctness_p0/test_select_with_prefix.groovy @@ -0,0 +1,149 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_select_with_prefix") { + def tableName = "test_select_with_perfix" + sql """ DROP TABLE IF EXISTS $tableName """ + sql """ + CREATE TABLE $tableName ( + `c0` varchar(64) NULL, + `c1` varchar(64) NULL, + `c2` bigint(20) NOT NULL, + `c3` char(32) NULL, + `c4` datetime NULL, + `c5` tinyint(4) NOT NULL + ) ENGINE=OLAP + UNIQUE KEY(`c0`, `c1`, `c2`, `c3`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`c0`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true" + ); + """ + + sql """ + INSERT INTO $tableName (`c0`, `c1`, `c2`, `c3`, `c4`, `c5`) VALUES + ('', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1), + ('', 'mefgh', 1, 'abcdad', '2023-06-27 00:00:00', 1), + ('abcd', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1), + ('abcd', 'efgh', 2, 'abcdad', '2023-06-27 00:00:00', 1), + ('abcd', 'efghf', 2, 'abcdad', '2023-06-27 00:00:00', 1), + ('abcde', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('abcdf', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('abcdf', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('abce', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('abce', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('abd', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('def', 'hh', 2, 'afad', '2023-06-27 00:00:00', 1) + """ + + sql "sync" + + qt_sql "select * from $tableName where c0='abc'" + + qt_sql "select c0 from $tableName where c0='abc'" + + qt_sql "select * from $tableName where c0='abcd'" + + qt_sql "select c0 from $tableName where c0='abcd'" + + qt_sql "select c1 from $tableName where c0='abc'" + + qt_sql "select c1 from $tableName where c0='abcd'" + + qt_sql "select * from $tableName where c0=''" + + qt_sql "select c0 from $tableName where c0=''" + + qt_sql "select count(*) from $tableName where c0=''" + + qt_sql "select * from $tableName where c1='efg'" + + qt_sql "select c0 from $tableName where c1='efg'" + + qt_sql "select * from $tableName where c1='efgh'" + + qt_sql "select c0 from $tableName where c1='efgh'" + + sql """ DROP TABLE IF EXISTS $tableName """ + sql """ + CREATE TABLE $tableName ( + `c0` varchar(64) NULL, + `c1` varchar(64) NULL, + `c2` bigint(20) NOT NULL, + `c3` char(32) NULL, + `c4` datetime NULL, + `c5` tinyint(4) NOT NULL + ) ENGINE=OLAP + UNIQUE KEY(`c0`, `c1`, `c2`, `c3`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`c0`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "false" + ); + """ + + sql """ + INSERT INTO $tableName (`c0`, `c1`, `c2`, `c3`, `c4`, `c5`) VALUES + ('', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1), + ('', 'mefgh', 1, 'abcdad', '2023-06-27 00:00:00', 1), + ('abcd', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1), + ('abcd', 'efgh', 2, 'abcdad', '2023-06-27 00:00:00', 1), + ('abcd', 'efghf', 2, 'abcdad', '2023-06-27 00:00:00', 1), + ('abcde', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('abcdf', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('abcdf', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('abce', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('abce', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('abd', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1), + ('def', 'hh', 2, 'afad', '2023-06-27 00:00:00', 1) + """ + + sql "sync" + + qt_sql "select * from $tableName where c0='abc'" + + qt_sql "select c0 from $tableName where c0='abc'" + + qt_sql "select * from $tableName where c0='abcd'" + + qt_sql "select c0 from $tableName where c0='abcd'" + + qt_sql "select c1 from $tableName where c0='abc'" + + qt_sql "select c1 from $tableName where c0='abcd'" + + qt_sql "select * from $tableName where c0=''" + + qt_sql "select c0 from $tableName where c0=''" + + qt_sql "select count(*) from $tableName where c0=''" + + qt_sql "select * from $tableName where c1='efg'" + + qt_sql "select c0 from $tableName where c1='efg'" + + qt_sql "select * from $tableName where c1='efgh'" + + qt_sql "select c0 from $tableName where c1='efgh'" + + sql """ DROP TABLE IF EXISTS $tableName """ +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org