This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new d92b9c803e6 [branch-2.0](cherry-pick) Add more indexed column reader be unit test #25652 (#26430) d92b9c803e6 is described below commit d92b9c803e6a06b947c5ad60c71958761752f549 Author: abmdocrt <yukang.lian2...@gmail.com> AuthorDate: Mon Nov 6 22:58:05 2023 +0800 [branch-2.0](cherry-pick) Add more indexed column reader be unit test #25652 (#26430) --- be/src/olap/primary_key_index.h | 3 + .../olap/rowset/segment_v2/indexed_column_writer.h | 3 + be/test/olap/primary_key_index_test.cpp | 151 +++++++++++++++++++++ 3 files changed, 157 insertions(+) diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h index 65cc64f0cd6..911a17ea058 100644 --- a/be/src/olap/primary_key_index.h +++ b/be/src/olap/primary_key_index.h @@ -61,6 +61,9 @@ public: uint64_t size() const { return _size; } + // used for be ut + uint32_t data_page_num() const { return _primary_key_index_builder->data_page_num(); } + Slice min_key() { return Slice(_min_key.data(), _min_key.size() - _seq_col_length); } Slice max_key() { return Slice(_max_key.data(), _max_key.size() - _seq_col_length); } diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h b/be/src/olap/rowset/segment_v2/indexed_column_writer.h index a95a9fce7f7..7cd1bc656e6 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h @@ -22,6 +22,7 @@ #include <stdint.h> #include <cstddef> +#include <cstdint> #include <memory> #include "common/status.h" @@ -83,6 +84,8 @@ public: Status finish(IndexedColumnMetaPB* meta); + uint32_t data_page_num() const { return _num_data_pages + 1; } + private: Status _finish_current_data_page(size_t& num_val); diff --git a/be/test/olap/primary_key_index_test.cpp b/be/test/olap/primary_key_index_test.cpp index d643ab501e8..64a49f010d9 100644 --- a/be/test/olap/primary_key_index_test.cpp +++ b/be/test/olap/primary_key_index_test.cpp @@ -167,4 +167,155 @@ TEST_F(PrimaryKeyIndexTest, builder) { } } +TEST_F(PrimaryKeyIndexTest, multiple_pages) { + std::string filename = kTestDir + "/multiple_pages"; + io::FileWriterPtr file_writer; + auto fs = io::global_local_filesystem(); + EXPECT_TRUE(fs->create_file(filename, &file_writer).ok()); + + config::primary_key_data_page_size = 5 * 5; + PrimaryKeyIndexBuilder builder(file_writer.get(), 0); + static_cast<void>(builder.init()); + size_t num_rows = 0; + std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008", + "00010", "00012", "00014", "00016", "00018"}; + for (const std::string& key : keys) { + static_cast<void>(builder.add_item(key)); + num_rows++; + } + EXPECT_EQ("00000", builder.min_key().to_string()); + EXPECT_EQ("00018", builder.max_key().to_string()); + EXPECT_EQ(builder.size(), 2 * 5 * 5); + EXPECT_GT(builder.data_page_num(), 1); + segment_v2::PrimaryKeyIndexMetaPB index_meta; + EXPECT_TRUE(builder.finalize(&index_meta)); + EXPECT_TRUE(file_writer->close().ok()); + EXPECT_EQ(num_rows, builder.num_rows()); + + PrimaryKeyIndexReader index_reader; + io::FileReaderSPtr file_reader; + EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); + EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok()); + EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok()); + EXPECT_EQ(num_rows, index_reader.num_rows()); + + std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator; + EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok()); + bool exact_match = false; + uint32_t row_id; + for (size_t i = 0; i < keys.size(); i++) { + bool exists = index_reader.check_present(keys[i]); + EXPECT_TRUE(exists); + auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_TRUE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i, row_id); + } + for (size_t i = 0; i < keys.size(); i++) { + bool exists = index_reader.check_present(keys[i]); + EXPECT_TRUE(exists); + auto status = index_iterator->seek_to_ordinal(i); + EXPECT_TRUE(status.ok()); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i, row_id); + } + { + auto status = index_iterator->seek_to_ordinal(10); + EXPECT_TRUE(status.ok()); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(10, row_id); + } + + std::vector<std::string> non_exist_keys {"00001", "00003", "00005", "00007", "00009", + "00011", "00013", "00015", "00017"}; + for (size_t i = 0; i < non_exist_keys.size(); i++) { + Slice slice(non_exist_keys[i]); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_FALSE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i + 1, row_id); + } + { + string key("00019"); + Slice slice(key); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_FALSE(exact_match); + EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>()); + } +} + +TEST_F(PrimaryKeyIndexTest, single_page) { + std::string filename = kTestDir + "/single_page"; + io::FileWriterPtr file_writer; + auto fs = io::global_local_filesystem(); + EXPECT_TRUE(fs->create_file(filename, &file_writer).ok()); + config::primary_key_data_page_size = 32768; + + PrimaryKeyIndexBuilder builder(file_writer.get(), 0); + static_cast<void>(builder.init()); + size_t num_rows = 0; + std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008", + "00010", "00012", "00014", "00016", "00018"}; + for (const std::string& key : keys) { + static_cast<void>(builder.add_item(key)); + num_rows++; + } + EXPECT_EQ("00000", builder.min_key().to_string()); + EXPECT_EQ("00018", builder.max_key().to_string()); + EXPECT_EQ(builder.size(), 2 * 5 * 5); + EXPECT_EQ(builder.data_page_num(), 1); + segment_v2::PrimaryKeyIndexMetaPB index_meta; + EXPECT_TRUE(builder.finalize(&index_meta)); + EXPECT_TRUE(file_writer->close().ok()); + EXPECT_EQ(num_rows, builder.num_rows()); + + PrimaryKeyIndexReader index_reader; + io::FileReaderSPtr file_reader; + EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); + EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok()); + EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok()); + EXPECT_EQ(num_rows, index_reader.num_rows()); + + std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator; + EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok()); + bool exact_match = false; + uint32_t row_id; + for (size_t i = 0; i < keys.size(); i++) { + bool exists = index_reader.check_present(keys[i]); + EXPECT_TRUE(exists); + auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_TRUE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i, row_id); + } + + std::vector<std::string> non_exist_keys {"00001", "00003", "00005", "00007", "00009", + "00011", "00013", "00015", "00017"}; + for (size_t i = 0; i < non_exist_keys.size(); i++) { + Slice slice(non_exist_keys[i]); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_FALSE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i + 1, row_id); + } + { + string key("00019"); + Slice slice(key); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_FALSE(exact_match); + EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>()); + } +} } // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org