This is an automated email from the ASF dual-hosted git repository. zhangchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 9519d7ede98 [enhancement](be-ut)Add more indexed column reader be unit test (#25652) 9519d7ede98 is described below commit 9519d7ede989fbd219c6b473d0a798fabd57b677 Author: abmdocrt <yukang.lian2...@gmail.com> AuthorDate: Mon Oct 23 10:12:53 2023 +0800 [enhancement](be-ut)Add more indexed column reader be unit test (#25652) Added more unit tests 1. key exists or does not exist in a single page 2. key exists or does not exist in multiple pages 3. key is between two pages. --- be/src/olap/primary_key_index.h | 3 + .../olap/rowset/segment_v2/indexed_column_writer.h | 3 + be/test/olap/primary_key_index_test.cpp | 153 +++++++++++++++++++++ 3 files changed, 159 insertions(+) diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h index 233644b4e07..59b88c2f724 100644 --- a/be/src/olap/primary_key_index.h +++ b/be/src/olap/primary_key_index.h @@ -67,6 +67,9 @@ public: uint64_t disk_size() const { return _disk_size; } + // used for be ut + uint32_t data_page_num() const { return _primary_key_index_builder->data_page_num(); } + Slice min_key() { return Slice(_min_key.data(), _min_key.size() - _seq_col_length); } Slice max_key() { return Slice(_max_key.data(), _max_key.size() - _seq_col_length); } diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h b/be/src/olap/rowset/segment_v2/indexed_column_writer.h index ba61708dd90..ecb26782ad1 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h @@ -22,6 +22,7 @@ #include <stdint.h> #include <cstddef> +#include <cstdint> #include <memory> #include "common/status.h" @@ -85,6 +86,8 @@ public: uint64_t disk_size() const { return _disk_size; } + uint32_t data_page_num() const { return _num_data_pages + 1; } + private: Status _finish_current_data_page(size_t& num_val); diff --git a/be/test/olap/primary_key_index_test.cpp b/be/test/olap/primary_key_index_test.cpp index 4de6be24feb..fb96e7411e6 100644 --- a/be/test/olap/primary_key_index_test.cpp +++ b/be/test/olap/primary_key_index_test.cpp @@ -167,4 +167,157 @@ TEST_F(PrimaryKeyIndexTest, builder) { } } +TEST_F(PrimaryKeyIndexTest, multiple_pages) { + std::string filename = kTestDir + "/multiple_pages"; + io::FileWriterPtr file_writer; + auto fs = io::global_local_filesystem(); + EXPECT_TRUE(fs->create_file(filename, &file_writer).ok()); + + config::primary_key_data_page_size = 5 * 5; + PrimaryKeyIndexBuilder builder(file_writer.get(), 0); + static_cast<void>(builder.init()); + size_t num_rows = 0; + std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008", + "00010", "00012", "00014", "00016", "00018"}; + for (const std::string& key : keys) { + static_cast<void>(builder.add_item(key)); + num_rows++; + } + EXPECT_EQ("00000", builder.min_key().to_string()); + EXPECT_EQ("00018", builder.max_key().to_string()); + EXPECT_EQ(builder.size(), 2 * 5 * 5); + EXPECT_GT(builder.data_page_num(), 1); + segment_v2::PrimaryKeyIndexMetaPB index_meta; + EXPECT_TRUE(builder.finalize(&index_meta)); + EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended()); + EXPECT_TRUE(file_writer->close().ok()); + EXPECT_EQ(num_rows, builder.num_rows()); + + PrimaryKeyIndexReader index_reader; + io::FileReaderSPtr file_reader; + EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); + EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok()); + EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok()); + EXPECT_EQ(num_rows, index_reader.num_rows()); + + std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator; + EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok()); + bool exact_match = false; + uint32_t row_id; + for (size_t i = 0; i < keys.size(); i++) { + bool exists = index_reader.check_present(keys[i]); + EXPECT_TRUE(exists); + auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_TRUE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i, row_id); + } + for (size_t i = 0; i < keys.size(); i++) { + bool exists = index_reader.check_present(keys[i]); + EXPECT_TRUE(exists); + auto status = index_iterator->seek_to_ordinal(i); + EXPECT_TRUE(status.ok()); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i, row_id); + } + { + auto status = index_iterator->seek_to_ordinal(10); + EXPECT_TRUE(status.ok()); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(10, row_id); + } + + std::vector<std::string> non_exist_keys {"00001", "00003", "00005", "00007", "00009", + "00011", "00013", "00015", "00017"}; + for (size_t i = 0; i < non_exist_keys.size(); i++) { + Slice slice(non_exist_keys[i]); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_FALSE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i + 1, row_id); + } + { + string key("00019"); + Slice slice(key); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_FALSE(exact_match); + EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>()); + } +} + +TEST_F(PrimaryKeyIndexTest, single_page) { + std::string filename = kTestDir + "/single_page"; + io::FileWriterPtr file_writer; + auto fs = io::global_local_filesystem(); + EXPECT_TRUE(fs->create_file(filename, &file_writer).ok()); + config::primary_key_data_page_size = 32768; + + PrimaryKeyIndexBuilder builder(file_writer.get(), 0); + static_cast<void>(builder.init()); + size_t num_rows = 0; + std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008", + "00010", "00012", "00014", "00016", "00018"}; + for (const std::string& key : keys) { + static_cast<void>(builder.add_item(key)); + num_rows++; + } + EXPECT_EQ("00000", builder.min_key().to_string()); + EXPECT_EQ("00018", builder.max_key().to_string()); + EXPECT_EQ(builder.size(), 2 * 5 * 5); + EXPECT_EQ(builder.data_page_num(), 1); + segment_v2::PrimaryKeyIndexMetaPB index_meta; + EXPECT_TRUE(builder.finalize(&index_meta)); + EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended()); + EXPECT_TRUE(file_writer->close().ok()); + EXPECT_EQ(num_rows, builder.num_rows()); + + PrimaryKeyIndexReader index_reader; + io::FileReaderSPtr file_reader; + EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); + EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok()); + EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok()); + EXPECT_EQ(num_rows, index_reader.num_rows()); + + std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator; + EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok()); + bool exact_match = false; + uint32_t row_id; + for (size_t i = 0; i < keys.size(); i++) { + bool exists = index_reader.check_present(keys[i]); + EXPECT_TRUE(exists); + auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_TRUE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i, row_id); + } + + std::vector<std::string> non_exist_keys {"00001", "00003", "00005", "00007", "00009", + "00011", "00013", "00015", "00017"}; + for (size_t i = 0; i < non_exist_keys.size(); i++) { + Slice slice(non_exist_keys[i]); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_TRUE(status.ok()); + EXPECT_FALSE(exact_match); + row_id = index_iterator->get_current_ordinal(); + EXPECT_EQ(i + 1, row_id); + } + { + string key("00019"); + Slice slice(key); + bool exists = index_reader.check_present(slice); + EXPECT_FALSE(exists); + auto status = index_iterator->seek_at_or_after(&slice, &exact_match); + EXPECT_FALSE(exact_match); + EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>()); + } +} } // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org