This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new d92b9c803e6 [branch-2.0](cherry-pick) Add more indexed column reader 
be unit test #25652 (#26430)
d92b9c803e6 is described below

commit d92b9c803e6a06b947c5ad60c71958761752f549
Author: abmdocrt <yukang.lian2...@gmail.com>
AuthorDate: Mon Nov 6 22:58:05 2023 +0800

    [branch-2.0](cherry-pick) Add more indexed column reader be unit test 
#25652 (#26430)
---
 be/src/olap/primary_key_index.h                    |   3 +
 .../olap/rowset/segment_v2/indexed_column_writer.h |   3 +
 be/test/olap/primary_key_index_test.cpp            | 151 +++++++++++++++++++++
 3 files changed, 157 insertions(+)

diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h
index 65cc64f0cd6..911a17ea058 100644
--- a/be/src/olap/primary_key_index.h
+++ b/be/src/olap/primary_key_index.h
@@ -61,6 +61,9 @@ public:
 
     uint64_t size() const { return _size; }
 
+    // used for be ut
+    uint32_t data_page_num() const { return 
_primary_key_index_builder->data_page_num(); }
+
     Slice min_key() { return Slice(_min_key.data(), _min_key.size() - 
_seq_col_length); }
     Slice max_key() { return Slice(_max_key.data(), _max_key.size() - 
_seq_col_length); }
 
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h 
b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
index a95a9fce7f7..7cd1bc656e6 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
@@ -22,6 +22,7 @@
 #include <stdint.h>
 
 #include <cstddef>
+#include <cstdint>
 #include <memory>
 
 #include "common/status.h"
@@ -83,6 +84,8 @@ public:
 
     Status finish(IndexedColumnMetaPB* meta);
 
+    uint32_t data_page_num() const { return _num_data_pages + 1; }
+
 private:
     Status _finish_current_data_page(size_t& num_val);
 
diff --git a/be/test/olap/primary_key_index_test.cpp 
b/be/test/olap/primary_key_index_test.cpp
index d643ab501e8..64a49f010d9 100644
--- a/be/test/olap/primary_key_index_test.cpp
+++ b/be/test/olap/primary_key_index_test.cpp
@@ -167,4 +167,155 @@ TEST_F(PrimaryKeyIndexTest, builder) {
     }
 }
 
+TEST_F(PrimaryKeyIndexTest, multiple_pages) {
+    std::string filename = kTestDir + "/multiple_pages";
+    io::FileWriterPtr file_writer;
+    auto fs = io::global_local_filesystem();
+    EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
+
+    config::primary_key_data_page_size = 5 * 5;
+    PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
+    static_cast<void>(builder.init());
+    size_t num_rows = 0;
+    std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
+                                   "00010", "00012", "00014", "00016", 
"00018"};
+    for (const std::string& key : keys) {
+        static_cast<void>(builder.add_item(key));
+        num_rows++;
+    }
+    EXPECT_EQ("00000", builder.min_key().to_string());
+    EXPECT_EQ("00018", builder.max_key().to_string());
+    EXPECT_EQ(builder.size(), 2 * 5 * 5);
+    EXPECT_GT(builder.data_page_num(), 1);
+    segment_v2::PrimaryKeyIndexMetaPB index_meta;
+    EXPECT_TRUE(builder.finalize(&index_meta));
+    EXPECT_TRUE(file_writer->close().ok());
+    EXPECT_EQ(num_rows, builder.num_rows());
+
+    PrimaryKeyIndexReader index_reader;
+    io::FileReaderSPtr file_reader;
+    EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
+    EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok());
+    EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok());
+    EXPECT_EQ(num_rows, index_reader.num_rows());
+
+    std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
+    EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
+    bool exact_match = false;
+    uint32_t row_id;
+    for (size_t i = 0; i < keys.size(); i++) {
+        bool exists = index_reader.check_present(keys[i]);
+        EXPECT_TRUE(exists);
+        auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
+        EXPECT_TRUE(status.ok());
+        EXPECT_TRUE(exact_match);
+        row_id = index_iterator->get_current_ordinal();
+        EXPECT_EQ(i, row_id);
+    }
+    for (size_t i = 0; i < keys.size(); i++) {
+        bool exists = index_reader.check_present(keys[i]);
+        EXPECT_TRUE(exists);
+        auto status = index_iterator->seek_to_ordinal(i);
+        EXPECT_TRUE(status.ok());
+        row_id = index_iterator->get_current_ordinal();
+        EXPECT_EQ(i, row_id);
+    }
+    {
+        auto status = index_iterator->seek_to_ordinal(10);
+        EXPECT_TRUE(status.ok());
+        row_id = index_iterator->get_current_ordinal();
+        EXPECT_EQ(10, row_id);
+    }
+
+    std::vector<std::string> non_exist_keys {"00001", "00003", "00005", 
"00007", "00009",
+                                             "00011", "00013", "00015", 
"00017"};
+    for (size_t i = 0; i < non_exist_keys.size(); i++) {
+        Slice slice(non_exist_keys[i]);
+        bool exists = index_reader.check_present(slice);
+        EXPECT_FALSE(exists);
+        auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+        EXPECT_TRUE(status.ok());
+        EXPECT_FALSE(exact_match);
+        row_id = index_iterator->get_current_ordinal();
+        EXPECT_EQ(i + 1, row_id);
+    }
+    {
+        string key("00019");
+        Slice slice(key);
+        bool exists = index_reader.check_present(slice);
+        EXPECT_FALSE(exists);
+        auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+        EXPECT_FALSE(exact_match);
+        EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
+    }
+}
+
+TEST_F(PrimaryKeyIndexTest, single_page) {
+    std::string filename = kTestDir + "/single_page";
+    io::FileWriterPtr file_writer;
+    auto fs = io::global_local_filesystem();
+    EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
+    config::primary_key_data_page_size = 32768;
+
+    PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
+    static_cast<void>(builder.init());
+    size_t num_rows = 0;
+    std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
+                                   "00010", "00012", "00014", "00016", 
"00018"};
+    for (const std::string& key : keys) {
+        static_cast<void>(builder.add_item(key));
+        num_rows++;
+    }
+    EXPECT_EQ("00000", builder.min_key().to_string());
+    EXPECT_EQ("00018", builder.max_key().to_string());
+    EXPECT_EQ(builder.size(), 2 * 5 * 5);
+    EXPECT_EQ(builder.data_page_num(), 1);
+    segment_v2::PrimaryKeyIndexMetaPB index_meta;
+    EXPECT_TRUE(builder.finalize(&index_meta));
+    EXPECT_TRUE(file_writer->close().ok());
+    EXPECT_EQ(num_rows, builder.num_rows());
+
+    PrimaryKeyIndexReader index_reader;
+    io::FileReaderSPtr file_reader;
+    EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
+    EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok());
+    EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok());
+    EXPECT_EQ(num_rows, index_reader.num_rows());
+
+    std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
+    EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
+    bool exact_match = false;
+    uint32_t row_id;
+    for (size_t i = 0; i < keys.size(); i++) {
+        bool exists = index_reader.check_present(keys[i]);
+        EXPECT_TRUE(exists);
+        auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
+        EXPECT_TRUE(status.ok());
+        EXPECT_TRUE(exact_match);
+        row_id = index_iterator->get_current_ordinal();
+        EXPECT_EQ(i, row_id);
+    }
+
+    std::vector<std::string> non_exist_keys {"00001", "00003", "00005", 
"00007", "00009",
+                                             "00011", "00013", "00015", 
"00017"};
+    for (size_t i = 0; i < non_exist_keys.size(); i++) {
+        Slice slice(non_exist_keys[i]);
+        bool exists = index_reader.check_present(slice);
+        EXPECT_FALSE(exists);
+        auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+        EXPECT_TRUE(status.ok());
+        EXPECT_FALSE(exact_match);
+        row_id = index_iterator->get_current_ordinal();
+        EXPECT_EQ(i + 1, row_id);
+    }
+    {
+        string key("00019");
+        Slice slice(key);
+        bool exists = index_reader.check_present(slice);
+        EXPECT_FALSE(exists);
+        auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+        EXPECT_FALSE(exact_match);
+        EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
+    }
+}
 } // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to