This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 24b51b9035 [fix](compaction) segcompaction coredump if the rowset 
starts with a big segment (#14174) (#14176)
24b51b9035 is described below

commit 24b51b90357943801aa5654c1a6fd3ccd796e4c5
Author: zhengyu <freeman.zhang1...@gmail.com>
AuthorDate: Mon Nov 14 09:54:08 2022 +0800

    [fix](compaction) segcompaction coredump if the rowset starts with a big 
segment (#14174) (#14176)
    
    Signed-off-by: freemandealer <freeman.zhang1...@gmail.com>
    
    Signed-off-by: freemandealer <freeman.zhang1...@gmail.com>
---
 be/src/olap/rowset/beta_rowset_writer.cpp |   1 +
 be/test/olap/segcompaction_test.cpp       | 125 ++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+)

diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp 
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 78b85b9df3..65b78b288a 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -206,6 +206,7 @@ Status BetaRowsetWriter::_rename_compacted_segments(int64_t 
begin, int64_t end)
 
 Status BetaRowsetWriter::_rename_compacted_segment_plain(uint64_t seg_id) {
     if (seg_id == _num_segcompacted) {
+        ++_num_segcompacted;
         return Status::OK();
     }
 
diff --git a/be/test/olap/segcompaction_test.cpp 
b/be/test/olap/segcompaction_test.cpp
index ca2dc28f04..205fa0cbbd 100644
--- a/be/test/olap/segcompaction_test.cpp
+++ b/be/test/olap/segcompaction_test.cpp
@@ -91,6 +91,7 @@ public:
             delete l_engine;
             l_engine = nullptr;
         }
+        config::enable_segcompaction = false;
     }
 
 protected:
@@ -167,6 +168,7 @@ protected:
         tablet_schema->init_from_pb(tablet_schema_pb);
     }
 
+    // use different id to avoid conflict
     void create_rowset_writer_context(int64_t id, TabletSchemaSPtr 
tablet_schema,
                                       RowsetWriterContext* 
rowset_writer_context) {
         RowsetId rowset_id;
@@ -449,6 +451,129 @@ TEST_F(SegCompactionTest, 
SegCompactionInterleaveWithBig_ooooOOoOooooooooO) {
     }
 }
 
+TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) {
+    config::enable_segcompaction = true;
+    config::enable_storage_vectorization = true;
+    Status s;
+    TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+    create_tablet_schema(tablet_schema);
+
+    RowsetSharedPtr rowset;
+    config::segcompaction_small_threshold = 6000; // set threshold above
+    config::segcompaction_threshold_segment_num = 5;
+    std::vector<uint32_t> segment_num_rows;
+    { // write `num_segments * rows_per_segment` rows to rowset
+        RowsetWriterContext writer_context;
+        create_rowset_writer_context(10049, tablet_schema, &writer_context);
+
+        std::unique_ptr<RowsetWriter> rowset_writer;
+        s = RowsetFactory::create_rowset_writer(writer_context, 
&rowset_writer);
+        EXPECT_EQ(Status::OK(), s);
+
+        RowCursor input_row;
+        input_row.init(tablet_schema);
+
+        // for segment "i", row "rid"
+        // k1 := rid*10 + i
+        // k2 := k1 * 10
+        // k3 := 4096 * i + rid
+        int num_segments = 1;
+        uint32_t rows_per_segment = 6400;
+        for (int i = 0; i < num_segments; ++i) {
+            MemPool mem_pool;
+            for (int rid = 0; rid < rows_per_segment; ++rid) {
+                uint32_t k1 = rid * 100 + i;
+                uint32_t k2 = i;
+                uint32_t k3 = rid;
+                input_row.set_field_content(0, reinterpret_cast<char*>(&k1), 
&mem_pool);
+                input_row.set_field_content(1, reinterpret_cast<char*>(&k2), 
&mem_pool);
+                input_row.set_field_content(2, reinterpret_cast<char*>(&k3), 
&mem_pool);
+                s = rowset_writer->add_row(input_row);
+                EXPECT_EQ(Status::OK(), s);
+            }
+            s = rowset_writer->flush();
+            EXPECT_EQ(Status::OK(), s);
+        }
+        num_segments = 1;
+        rows_per_segment = 4096;
+        for (int i = 0; i < num_segments; ++i) {
+            MemPool mem_pool;
+            for (int rid = 0; rid < rows_per_segment; ++rid) {
+                uint32_t k1 = rid * 100 + i;
+                uint32_t k2 = i;
+                uint32_t k3 = rid;
+                input_row.set_field_content(0, reinterpret_cast<char*>(&k1), 
&mem_pool);
+                input_row.set_field_content(1, reinterpret_cast<char*>(&k2), 
&mem_pool);
+                input_row.set_field_content(2, reinterpret_cast<char*>(&k3), 
&mem_pool);
+                s = rowset_writer->add_row(input_row);
+                EXPECT_EQ(Status::OK(), s);
+            }
+            s = rowset_writer->flush();
+            EXPECT_EQ(Status::OK(), s);
+        }
+        num_segments = 1;
+        rows_per_segment = 6400;
+        for (int i = 0; i < num_segments; ++i) {
+            MemPool mem_pool;
+            for (int rid = 0; rid < rows_per_segment; ++rid) {
+                uint32_t k1 = rid * 100 + i;
+                uint32_t k2 = i;
+                uint32_t k3 = rid;
+                input_row.set_field_content(0, reinterpret_cast<char*>(&k1), 
&mem_pool);
+                input_row.set_field_content(1, reinterpret_cast<char*>(&k2), 
&mem_pool);
+                input_row.set_field_content(2, reinterpret_cast<char*>(&k3), 
&mem_pool);
+                s = rowset_writer->add_row(input_row);
+                EXPECT_EQ(Status::OK(), s);
+            }
+            s = rowset_writer->flush();
+            EXPECT_EQ(Status::OK(), s);
+        }
+        num_segments = 1;
+        rows_per_segment = 4096;
+        for (int i = 0; i < num_segments; ++i) {
+            MemPool mem_pool;
+            for (int rid = 0; rid < rows_per_segment; ++rid) {
+                uint32_t k1 = rid * 100 + i;
+                uint32_t k2 = i;
+                uint32_t k3 = rid;
+                input_row.set_field_content(0, reinterpret_cast<char*>(&k1), 
&mem_pool);
+                input_row.set_field_content(1, reinterpret_cast<char*>(&k2), 
&mem_pool);
+                input_row.set_field_content(2, reinterpret_cast<char*>(&k3), 
&mem_pool);
+                s = rowset_writer->add_row(input_row);
+                EXPECT_EQ(Status::OK(), s);
+            }
+            s = rowset_writer->flush();
+            EXPECT_EQ(Status::OK(), s);
+        }
+        num_segments = 1;
+        rows_per_segment = 6400;
+        for (int i = 0; i < num_segments; ++i) {
+            MemPool mem_pool;
+            for (int rid = 0; rid < rows_per_segment; ++rid) {
+                uint32_t k1 = rid * 100 + i;
+                uint32_t k2 = i;
+                uint32_t k3 = rid;
+                input_row.set_field_content(0, reinterpret_cast<char*>(&k1), 
&mem_pool);
+                input_row.set_field_content(1, reinterpret_cast<char*>(&k2), 
&mem_pool);
+                input_row.set_field_content(2, reinterpret_cast<char*>(&k3), 
&mem_pool);
+                s = rowset_writer->add_row(input_row);
+                EXPECT_EQ(Status::OK(), s);
+            }
+            s = rowset_writer->flush();
+            EXPECT_EQ(Status::OK(), s);
+        }
+
+        rowset = rowset_writer->build();
+        std::vector<std::string> ls;
+        ls.push_back("10049_0.dat"); // O
+        ls.push_back("10049_1.dat"); // o
+        ls.push_back("10049_2.dat"); // O
+        ls.push_back("10049_3.dat"); // o
+        ls.push_back("10049_4.dat"); // O
+        EXPECT_TRUE(check_dir(ls));
+    }
+}
+
 } // namespace doris
 
 // @brief Test Stub


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to