This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3b5f4ad198 [fix](unique-key-merge-on-write) fix that unique key with 
mow may loss some data in the query result with predicates (#14455)
3b5f4ad198 is described below

commit 3b5f4ad1980afb1d5730724aa8e0e2bb14239ec5
Author: Xin Liao <liaoxin...@126.com>
AuthorDate: Wed Nov 23 09:08:07 2022 +0800

    [fix](unique-key-merge-on-write) fix that unique key with mow may loss some 
data in the query result with predicates (#14455)
    
    When unique key with MOW table has sequence column, the query result may be 
wrong with predicates. There are two problems:
    
    The sequence column needs to be removed from primary key index when 
comparing key.
    The sequence column needs to be removed from min/max key.
---
 be/src/olap/primary_key_index.cpp                  |  6 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 28 ++++++++
 .../primary_index/test_unique_mow_sequence.out     | 25 +++++++
 .../primary_index/test_unique_mow_sequence.groovy  | 83 ++++++++++++++++++++++
 4 files changed, 139 insertions(+), 3 deletions(-)

diff --git a/be/src/olap/primary_key_index.cpp 
b/be/src/olap/primary_key_index.cpp
index 79f6e782cd..6e2c3d954d 100644
--- a/be/src/olap/primary_key_index.cpp
+++ b/be/src/olap/primary_key_index.cpp
@@ -60,9 +60,9 @@ Status 
PrimaryKeyIndexBuilder::finalize(segment_v2::PrimaryKeyIndexMetaPB* meta)
     // finish primary key index
     
RETURN_IF_ERROR(_primary_key_index_builder->finish(meta->mutable_primary_key_index()));
 
-    // set min_max key
-    meta->set_min_key(_min_key.ToString());
-    meta->set_max_key(_max_key.ToString());
+    // set min_max key, the sequence column should be removed
+    meta->set_min_key(min_key().to_string());
+    meta->set_max_key(max_key().to_string());
 
     // finish bloom filter index
     RETURN_IF_ERROR(_bloom_filter_index_builder->flush());
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index a4ca422e69..e6eff66685 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -525,6 +525,34 @@ Status 
SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool
     }
     *rowid = index_iterator->get_current_ordinal();
 
+    // The sequence column needs to be removed from primary key index when 
comparing key
+    bool has_seq_col = _segment->_tablet_schema->has_sequence_col();
+    if (has_seq_col) {
+        size_t seq_col_length =
+                
_segment->_tablet_schema->column(_segment->_tablet_schema->sequence_col_idx())
+                        .length() +
+                1;
+        MemPool pool;
+        size_t num_to_read = 1;
+        std::unique_ptr<ColumnVectorBatch> cvb;
+        RETURN_IF_ERROR(ColumnVectorBatch::create(
+                num_to_read, false, _segment->_pk_index_reader->type_info(), 
nullptr, &cvb));
+        ColumnBlock block(cvb.get(), &pool);
+        ColumnBlockView column_block_view(&block);
+        size_t num_read = num_to_read;
+        RETURN_IF_ERROR(index_iterator->next_batch(&num_read, 
&column_block_view));
+        DCHECK(num_to_read == num_read);
+
+        const Slice* sought_key = reinterpret_cast<const 
Slice*>(cvb->cell_ptr(0));
+        Slice sought_key_without_seq =
+                Slice(sought_key->get_data(), sought_key->get_size() - 
seq_col_length);
+
+        // compare key
+        if (Slice(index_key).compare(sought_key_without_seq) == 0) {
+            exact_match = true;
+        }
+    }
+
     // find the key in primary key index, and the is_include is false, so move
     // to the next row.
     if (exact_match && !is_include) {
diff --git a/regression-test/data/primary_index/test_unique_mow_sequence.out 
b/regression-test/data/primary_index/test_unique_mow_sequence.out
new file mode 100644
index 0000000000..65b68e4152
--- /dev/null
+++ b/regression-test/data/primary_index/test_unique_mow_sequence.out
@@ -0,0 +1,25 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      Customer#000000001      j5JsirBM9P      MOROCCO  0      MOROCCO AFRICA  
25-989-741-2988 BUILDING
+2      Customer#000000002      487LW1dovn6Q4dMVym      JORDAN   1      JORDAN  
MIDDLE EAST     23-768-687-3665 AUTOMOBILE
+3      Customer#000000003      fkRGN8n ARGENTINA7      ARGENTINA       AMERICA 
11-719-748-3364 AUTOMOBILE
+4      Customer#000000004      4u58h f EGYPT    4      EGYPT   MIDDLE EAST     
14-128-190-5944 MACHINERY
+5      Customer#000000005      hwBtxkoBF qSW4KrI       CANADA   5      CANADA  
AMERICA 13-750-942-6364 HOUSEHOLD
+
+-- !sql --
+2996   Customer#000002996      PFd,H,pC        PERU     1      PERU    AMERICA 
27-412-836-3763 FURNITURE
+2997   Customer#000002997      LiVKxN3lQHLunID ALGERIA  0      ALGERIA AFRICA  
10-600-583-9608 FURNITURE
+2998   Customer#000002998      waJRUwjblh3sJbglX9gS9w  PERU     7      PERU    
AMERICA 27-747-219-4938 AUTOMOBILE
+2999   Customer#000002999      HaPy4sQ MiANd0pR5uA7    VIETNAM  5      VIETNAM 
ASIA    31-297-683-9811 MACHINERY
+3000   Customer#000003000      ,5Yw1O  EGYPT    4      EGYPT   MIDDLE EAST     
14-645-615-5901 FURNITURE
+
+-- !sql --
+1      Customer#000000001      j5JsirBM9P      MOROCCO  0      MOROCCO AFRICA  
25-989-741-2988 BUILDING
+
+-- !sql --
+3000   Customer#000003000      ,5Yw1O  EGYPT    4      EGYPT   MIDDLE EAST     
14-645-615-5901 FURNITURE
+
+-- !sql --
+
+-- !sql --
+
diff --git 
a/regression-test/suites/primary_index/test_unique_mow_sequence.groovy 
b/regression-test/suites/primary_index/test_unique_mow_sequence.groovy
new file mode 100644
index 0000000000..2612712165
--- /dev/null
+++ b/regression-test/suites/primary_index/test_unique_mow_sequence.groovy
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_unique_mow_sequence") {
+    def tableName = "test_mow_sequence"
+        sql """ DROP TABLE IF EXISTS $tableName """
+        sql """
+            CREATE TABLE `$tableName` (
+                    `c_custkey` int(11) NOT NULL COMMENT "",
+                    `c_name` varchar(26) NOT NULL COMMENT "",
+                    `c_address` varchar(41) NOT NULL COMMENT "",
+                    `c_city` varchar(11) NOT NULL COMMENT "",
+                    `c_nation` varchar(16) NOT NULL COMMENT "",
+                    `c_region` varchar(13) NOT NULL COMMENT "",
+                    `c_phone` varchar(16) NOT NULL COMMENT "",
+                    `c_mktsegment` varchar(11) NOT NULL COMMENT ""
+            )
+            UNIQUE KEY (`c_custkey`)
+            DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 10
+            PROPERTIES (
+                    "function_column.sequence_type" = 'int',
+                    "compression"="zstd",
+                    "replication_num" = "1",
+                    "enable_unique_key_merge_on_write" = "true"
+             );
+        """
+
+        streamLoad {
+            table "${tableName}"
+
+            set 'column_separator', '|'
+            set 'compress_type', 'GZ'
+            set 'columns', 
'c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use'
+            set 'function_column.sequence_col', 'c_custkey'
+
+            file """${context.sf1DataPath}/ssb/sf0.1/customer.tbl.gz"""
+
+            time 10000 // limit inflight 10s
+
+            // stream load action will check result, include Success status, 
and NumberTotalRows == NumberLoadedRows
+
+            // if declared a check callback, the default check condition will 
ignore.
+            // So you must check all condition
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(json.NumberTotalRows, json.NumberLoadedRows)
+                assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+            }
+        }
+
+        sql "sync"
+
+        order_qt_sql "select * from $tableName where c_custkey < 6;"
+
+        order_qt_sql "select * from $tableName where c_custkey > 2995;"
+
+        qt_sql "select * from $tableName where c_custkey = 1;"
+
+        qt_sql "select * from $tableName where c_custkey = 3000;"
+
+        qt_sql "select * from $tableName where c_custkey = 3001;"
+
+        qt_sql "select * from $tableName where c_custkey = 0;"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to