This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new d6ee2ff70d8 [Fix](merge-on-write) Fix duplicate key problem after 
adding sequence column for merge-on-write table #39958 (#40015)
d6ee2ff70d8 is described below

commit d6ee2ff70d8ba0f83620bd9b4e66ec7ed4cac787
Author: bobhan1 <bh2444151...@outlook.com>
AuthorDate: Thu Aug 29 19:33:34 2024 +0800

    [Fix](merge-on-write) Fix duplicate key problem after adding sequence 
column for merge-on-write table #39958 (#40015)
---
 be/src/olap/rowset/segment_v2/segment.cpp          | 16 +++--
 be/src/olap/rowset/segment_v2/segment.h            |  3 +-
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |  5 +-
 be/src/olap/tablet.cpp                             | 17 +++---
 be/src/olap/tablet.h                               |  2 +-
 be/src/service/point_query_executor.cpp            |  6 +-
 .../test_mow_enable_sequence_col.out               | 16 +++++
 .../test_mow_enable_sequence_col.groovy            | 71 ++++++++++++++++++++++
 8 files changed, 117 insertions(+), 19 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 9d12c772cd3..90ab6b45cf6 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -424,12 +424,13 @@ Status Segment::new_inverted_index_iterator(const 
TabletColumn& tablet_column,
     return Status::OK();
 }
 
-Status Segment::lookup_row_key(const Slice& key, bool with_seq_col, 
RowLocation* row_location) {
+Status Segment::lookup_row_key(const Slice& key, const TabletSchema* 
latest_schema,
+                               bool with_seq_col, RowLocation* row_location) {
     RETURN_IF_ERROR(load_pk_index_and_bf());
-    bool has_seq_col = _tablet_schema->has_sequence_col();
+    bool has_seq_col = latest_schema->has_sequence_col();
     size_t seq_col_length = 0;
     if (has_seq_col) {
-        seq_col_length = 
_tablet_schema->column(_tablet_schema->sequence_col_idx()).length() + 1;
+        seq_col_length = 
latest_schema->column(latest_schema->sequence_col_idx()).length() + 1;
     }
 
     Slice key_without_seq =
@@ -464,15 +465,20 @@ Status Segment::lookup_row_key(const Slice& key, bool 
with_seq_col, RowLocation*
 
         Slice sought_key =
                 Slice(index_column->get_data_at(0).data, 
index_column->get_data_at(0).size);
+        // user may use "ALTER TABLE tbl ENABLE FEATURE "SEQUENCE_LOAD" WITH 
..." to add a hidden sequence column
+        // for a merge-on-write table which doesn't have sequence column, so 
`has_seq_col ==  true` doesn't mean
+        // data in segment has sequence column value
+        bool segment_has_seq_col = _tablet_schema->has_sequence_col();
         Slice sought_key_without_seq =
-                Slice(sought_key.get_data(), sought_key.get_size() - 
seq_col_length);
+                Slice(sought_key.get_data(),
+                      sought_key.get_size() - (segment_has_seq_col ? 
seq_col_length : 0));
 
         // compare key
         if (key_without_seq.compare(sought_key_without_seq) != 0) {
             return Status::Error<ErrorCode::KEY_NOT_FOUND>("Can't find key in 
the segment");
         }
 
-        if (!with_seq_col) {
+        if (!with_seq_col || !segment_has_seq_col) {
             return Status::OK();
         }
 
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index 34b41843aa1..6223c5f81f2 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -108,7 +108,8 @@ public:
         return _pk_index_reader.get();
     }
 
-    Status lookup_row_key(const Slice& key, bool with_seq_col, RowLocation* 
row_location);
+    Status lookup_row_key(const Slice& key, const TabletSchema* latest_schema, 
bool with_seq_col,
+                          RowLocation* row_location);
 
     Status read_key_by_rowid(uint32_t row_id, std::string* key);
 
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 64e91be3e9a..a4fa7940918 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -449,8 +449,9 @@ Status 
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
         RowLocation loc;
         // save rowset shared ptr so this rowset wouldn't delete
         RowsetSharedPtr rowset;
-        auto st = _tablet->lookup_row_key(key, have_input_seq_column, 
specified_rowsets, &loc,
-                                          _mow_context->max_version, 
segment_caches, &rowset);
+        auto st = _tablet->lookup_row_key(key, _tablet_schema.get(), 
have_input_seq_column,
+                                          specified_rowsets, &loc, 
_mow_context->max_version,
+                                          segment_caches, &rowset);
         if (st.is<KEY_NOT_FOUND>()) {
             if (_opts.rowset_ctx->partial_update_info->is_strict_mode) {
                 ++num_rows_filtered;
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 8a4a510f4c6..b2d5f9c114d 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -3041,15 +3041,18 @@ Status Tablet::lookup_row_data(const Slice& 
encoded_key, const RowLocation& row_
     return Status::OK();
 }
 
-Status Tablet::lookup_row_key(const Slice& encoded_key, bool with_seq_col,
+Status Tablet::lookup_row_key(const Slice& encoded_key, TabletSchema* 
latest_schema,
+                              bool with_seq_col,
                               const std::vector<RowsetSharedPtr>& 
specified_rowsets,
                               RowLocation* row_location, uint32_t version,
                               
std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches,
                               RowsetSharedPtr* rowset) {
     SCOPED_BVAR_LATENCY(g_tablet_lookup_rowkey_latency);
     size_t seq_col_length = 0;
-    if (_schema->has_sequence_col() && with_seq_col) {
-        seq_col_length = _schema->column(_schema->sequence_col_idx()).length() 
+ 1;
+    // use the latest tablet schema to decide if the tablet has sequence 
column currently
+    const TabletSchema* schema = (latest_schema == nullptr ? _schema.get() : 
latest_schema);
+    if (schema->has_sequence_col() && with_seq_col) {
+        seq_col_length = schema->column(schema->sequence_col_idx()).length() + 
1;
     }
     Slice key_without_seq = Slice(encoded_key.get_data(), 
encoded_key.get_size() - seq_col_length);
     RowLocation loc;
@@ -3080,7 +3083,7 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, 
bool with_seq_col,
         DCHECK_EQ(segments.size(), num_segments);
 
         for (auto id : picked_segments) {
-            Status s = segments[id]->lookup_row_key(encoded_key, with_seq_col, 
&loc);
+            Status s = segments[id]->lookup_row_key(encoded_key, schema, 
with_seq_col, &loc);
             if (s.is<KEY_NOT_FOUND>()) {
                 continue;
             }
@@ -3091,7 +3094,7 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, 
bool with_seq_col,
                                   {loc.rowset_id, loc.segment_id, version}, 
loc.row_id)) {
                 // if has sequence col, we continue to compare the sequence_id 
of
                 // all rowsets, util we find an existing key.
-                if (_schema->has_sequence_col()) {
+                if (schema->has_sequence_col()) {
                     continue;
                 }
                 // The key is deleted, we don't need to search for it any more.
@@ -3231,8 +3234,8 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr 
rowset,
             }
 
             RowsetSharedPtr rowset_find;
-            auto st = lookup_row_key(key, true, specified_rowsets, &loc, 
dummy_version.first - 1,
-                                     segment_caches, &rowset_find);
+            auto st = lookup_row_key(key, rowset_schema.get(), true, 
specified_rowsets, &loc,
+                                     dummy_version.first - 1, segment_caches, 
&rowset_find);
             bool expected_st = st.ok() || st.is<KEY_NOT_FOUND>() || 
st.is<KEY_ALREADY_EXISTS>();
             // It's a defensive DCHECK, we need to exclude some common errors 
to avoid core-dump
             // while stress test
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 20e9ae890cc..1fbc8254910 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -433,7 +433,7 @@ public:
     // Lookup the row location of `encoded_key`, the function sets 
`row_location` on success.
     // NOTE: the method only works in unique key model with primary key index, 
you will got a
     //       not supported error in other data model.
-    Status lookup_row_key(const Slice& encoded_key, bool with_seq_col,
+    Status lookup_row_key(const Slice& encoded_key, TabletSchema* 
latest_schema, bool with_seq_col,
                           const std::vector<RowsetSharedPtr>& 
specified_rowsets,
                           RowLocation* row_location, uint32_t version,
                           std::vector<std::unique_ptr<SegmentCacheHandle>>& 
segment_caches,
diff --git a/be/src/service/point_query_executor.cpp 
b/be/src/service/point_query_executor.cpp
index 9017b10aef8..6402909f5cf 100644
--- a/be/src/service/point_query_executor.cpp
+++ b/be/src/service/point_query_executor.cpp
@@ -298,9 +298,9 @@ Status PointQueryExecutor::_lookup_row_key() {
         }
         // Get rowlocation and rowset, ctx._rowset_ptr will acquire wrap this 
ptr
         auto rowset_ptr = std::make_unique<RowsetSharedPtr>();
-        st = (_tablet->lookup_row_key(_row_read_ctxs[i]._primary_key, false, 
specified_rowsets,
-                                      &location, INT32_MAX /*rethink?*/, 
segment_caches,
-                                      rowset_ptr.get()));
+        st = (_tablet->lookup_row_key(_row_read_ctxs[i]._primary_key, nullptr, 
false,
+                                      specified_rowsets, &location, INT32_MAX 
/*rethink?*/,
+                                      segment_caches, rowset_ptr.get()));
         if (st.is<ErrorCode::KEY_NOT_FOUND>()) {
             continue;
         }
diff --git 
a/regression-test/data/unique_with_mow_p0/test_mow_enable_sequence_col.out 
b/regression-test/data/unique_with_mow_p0/test_mow_enable_sequence_col.out
new file mode 100644
index 00000000000..d99510cfbcb
--- /dev/null
+++ b/regression-test/data/unique_with_mow_p0/test_mow_enable_sequence_col.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+111    aaa     bbb     11
+222    bbb     bbb     11
+333    ccc     ddd     11
+
+-- !sql --
+111    aaa     bbb     11      \N      0       2
+222    bbb     bbb     11      \N      0       3
+333    ccc     ddd     11      \N      0       4
+
+-- !sql --
+111    zzz     yyy     100     99      0       5
+222    xxx     www     400     99      0       8
+333    ccc     ddd     11      \N      0       4
+
diff --git 
a/regression-test/suites/unique_with_mow_p0/test_mow_enable_sequence_col.groovy 
b/regression-test/suites/unique_with_mow_p0/test_mow_enable_sequence_col.groovy
new file mode 100644
index 00000000000..ada3a55f042
--- /dev/null
+++ 
b/regression-test/suites/unique_with_mow_p0/test_mow_enable_sequence_col.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_mow_enable_sequence_col") {
+
+    def tableName = "test_mow_enable_sequence_col"
+    sql """ DROP TABLE IF EXISTS ${tableName} force;"""
+    sql """CREATE TABLE IF NOT EXISTS ${tableName}
+            (`user_id` BIGINT NOT NULL,
+            `username` VARCHAR(50) NOT NULL,
+            `city` VARCHAR(20),
+            `age` SMALLINT)
+            UNIQUE KEY(`user_id`)
+            DISTRIBUTED BY HASH(`user_id`) BUCKETS 1
+            PROPERTIES (
+            "disable_auto_compaction" = "true",
+            "replication_allocation" = "tag.location.default: 1",
+            "enable_unique_key_merge_on_write" = "true");"""
+
+    sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`) 
VALUES(111,'aaa','bbb',11);"""
+    sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`) 
VALUES(222,'bbb','bbb',11);"""
+    sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`) 
VALUES(333,'ccc','ddd',11);"""
+    order_qt_sql "select * from ${tableName};"
+
+    sql "set show_hidden_columns = true;"
+    sql "sync;"
+    def res = sql "desc ${tableName} all;"
+    assertTrue(!res.toString().contains("__DORIS_SEQUENCE_COL__"))
+    sql "set show_hidden_columns = false;"
+    sql "sync;"
+
+    def doSchemaChange = { cmd ->
+        sql cmd
+        waitForSchemaChangeDone {
+            sql """SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' 
ORDER BY createtime DESC LIMIT 1"""
+            time 2000
+        }
+    }
+    doSchemaChange """ALTER TABLE ${tableName} ENABLE FEATURE "SEQUENCE_LOAD" 
WITH PROPERTIES ("function_column.sequence_type" = "bigint");"""
+
+    sql "set show_hidden_columns = true;"
+    sql "sync;"
+    res = sql "desc ${tableName} all;"
+    assertTrue(res.toString().contains("__DORIS_SEQUENCE_COL__"))
+    order_qt_sql "select * from ${tableName};"
+    sql "set show_hidden_columns = false;"
+    sql "sync;"
+
+    sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`, 
`__DORIS_SEQUENCE_COL__`) VALUES(111,'zzz','yyy',100,99);"""
+    sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`, 
`__DORIS_SEQUENCE_COL__`) VALUES(111,'hhh','mmm',200,88);"""
+    sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`, 
`__DORIS_SEQUENCE_COL__`) VALUES(222,'qqq','ppp',300,77);"""
+    sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`, 
`__DORIS_SEQUENCE_COL__`) VALUES(222,'xxx','www',400,99);"""
+
+    sql "set show_hidden_columns = true;"
+    sql "sync;"
+    order_qt_sql "select * from ${tableName};"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to