This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 24b51b9035 [fix](compaction) segcompaction coredump if the rowset starts with a big segment (#14174) (#14176) 24b51b9035 is described below commit 24b51b90357943801aa5654c1a6fd3ccd796e4c5 Author: zhengyu <freeman.zhang1...@gmail.com> AuthorDate: Mon Nov 14 09:54:08 2022 +0800 [fix](compaction) segcompaction coredump if the rowset starts with a big segment (#14174) (#14176) Signed-off-by: freemandealer <freeman.zhang1...@gmail.com> Signed-off-by: freemandealer <freeman.zhang1...@gmail.com> --- be/src/olap/rowset/beta_rowset_writer.cpp | 1 + be/test/olap/segcompaction_test.cpp | 125 ++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 78b85b9df3..65b78b288a 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -206,6 +206,7 @@ Status BetaRowsetWriter::_rename_compacted_segments(int64_t begin, int64_t end) Status BetaRowsetWriter::_rename_compacted_segment_plain(uint64_t seg_id) { if (seg_id == _num_segcompacted) { + ++_num_segcompacted; return Status::OK(); } diff --git a/be/test/olap/segcompaction_test.cpp b/be/test/olap/segcompaction_test.cpp index ca2dc28f04..205fa0cbbd 100644 --- a/be/test/olap/segcompaction_test.cpp +++ b/be/test/olap/segcompaction_test.cpp @@ -91,6 +91,7 @@ public: delete l_engine; l_engine = nullptr; } + config::enable_segcompaction = false; } protected: @@ -167,6 +168,7 @@ protected: tablet_schema->init_from_pb(tablet_schema_pb); } + // use different id to avoid conflict void create_rowset_writer_context(int64_t id, TabletSchemaSPtr tablet_schema, RowsetWriterContext* rowset_writer_context) { RowsetId rowset_id; @@ -449,6 +451,129 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { } } +TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { + config::enable_segcompaction = true; + config::enable_storage_vectorization = true; + Status s; + TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(tablet_schema); + + RowsetSharedPtr rowset; + config::segcompaction_small_threshold = 6000; // set threshold above + config::segcompaction_threshold_segment_num = 5; + std::vector<uint32_t> segment_num_rows; + { // write `num_segments * rows_per_segment` rows to rowset + RowsetWriterContext writer_context; + create_rowset_writer_context(10049, tablet_schema, &writer_context); + + std::unique_ptr<RowsetWriter> rowset_writer; + s = RowsetFactory::create_rowset_writer(writer_context, &rowset_writer); + EXPECT_EQ(Status::OK(), s); + + RowCursor input_row; + input_row.init(tablet_schema); + + // for segment "i", row "rid" + // k1 := rid*10 + i + // k2 := k1 * 10 + // k3 := 4096 * i + rid + int num_segments = 1; + uint32_t rows_per_segment = 6400; + for (int i = 0; i < num_segments; ++i) { + MemPool mem_pool; + for (int rid = 0; rid < rows_per_segment; ++rid) { + uint32_t k1 = rid * 100 + i; + uint32_t k2 = i; + uint32_t k3 = rid; + input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool); + input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool); + input_row.set_field_content(2, reinterpret_cast<char*>(&k3), &mem_pool); + s = rowset_writer->add_row(input_row); + EXPECT_EQ(Status::OK(), s); + } + s = rowset_writer->flush(); + EXPECT_EQ(Status::OK(), s); + } + num_segments = 1; + rows_per_segment = 4096; + for (int i = 0; i < num_segments; ++i) { + MemPool mem_pool; + for (int rid = 0; rid < rows_per_segment; ++rid) { + uint32_t k1 = rid * 100 + i; + uint32_t k2 = i; + uint32_t k3 = rid; + input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool); + input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool); + input_row.set_field_content(2, reinterpret_cast<char*>(&k3), &mem_pool); + s = rowset_writer->add_row(input_row); + EXPECT_EQ(Status::OK(), s); + } + s = rowset_writer->flush(); + EXPECT_EQ(Status::OK(), s); + } + num_segments = 1; + rows_per_segment = 6400; + for (int i = 0; i < num_segments; ++i) { + MemPool mem_pool; + for (int rid = 0; rid < rows_per_segment; ++rid) { + uint32_t k1 = rid * 100 + i; + uint32_t k2 = i; + uint32_t k3 = rid; + input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool); + input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool); + input_row.set_field_content(2, reinterpret_cast<char*>(&k3), &mem_pool); + s = rowset_writer->add_row(input_row); + EXPECT_EQ(Status::OK(), s); + } + s = rowset_writer->flush(); + EXPECT_EQ(Status::OK(), s); + } + num_segments = 1; + rows_per_segment = 4096; + for (int i = 0; i < num_segments; ++i) { + MemPool mem_pool; + for (int rid = 0; rid < rows_per_segment; ++rid) { + uint32_t k1 = rid * 100 + i; + uint32_t k2 = i; + uint32_t k3 = rid; + input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool); + input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool); + input_row.set_field_content(2, reinterpret_cast<char*>(&k3), &mem_pool); + s = rowset_writer->add_row(input_row); + EXPECT_EQ(Status::OK(), s); + } + s = rowset_writer->flush(); + EXPECT_EQ(Status::OK(), s); + } + num_segments = 1; + rows_per_segment = 6400; + for (int i = 0; i < num_segments; ++i) { + MemPool mem_pool; + for (int rid = 0; rid < rows_per_segment; ++rid) { + uint32_t k1 = rid * 100 + i; + uint32_t k2 = i; + uint32_t k3 = rid; + input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool); + input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool); + input_row.set_field_content(2, reinterpret_cast<char*>(&k3), &mem_pool); + s = rowset_writer->add_row(input_row); + EXPECT_EQ(Status::OK(), s); + } + s = rowset_writer->flush(); + EXPECT_EQ(Status::OK(), s); + } + + rowset = rowset_writer->build(); + std::vector<std::string> ls; + ls.push_back("10049_0.dat"); // O + ls.push_back("10049_1.dat"); // o + ls.push_back("10049_2.dat"); // O + ls.push_back("10049_3.dat"); // o + ls.push_back("10049_4.dat"); // O + EXPECT_TRUE(check_dir(ls)); + } +} + } // namespace doris // @brief Test Stub --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org