github-actions[bot] commented on code in PR #15994:
URL: https://github.com/apache/doris/pull/15994#discussion_r1071687015


##########
be/src/olap/rowset/segment_v2/inverted_index_reader.cpp:
##########
@@ -290,6 +291,313 @@ InvertedIndexReaderType 
StringTypeInvertedIndexReader::type() {
     return InvertedIndexReaderType::STRING_TYPE;
 }
 
+BkdIndexReader::BkdIndexReader(io::FileSystemSPtr fs, const std::string& path,
+                               const uint32_t uniq_id)
+        : InvertedIndexReader(fs, path, uniq_id), compoundReader(nullptr) {
+    io::Path io_path(_path);
+    auto index_dir = io_path.parent_path();
+    auto index_file_name =
+            InvertedIndexDescriptor::get_index_file_name(io_path.filename(), 
_index_id);
+
+    // check index file existence
+    auto index_file = index_dir / index_file_name;
+    if (!indexExists(index_file)) {
+        LOG(WARNING) << "bkd index: " << index_file.string() << " not exist.";
+        return;
+    }
+    compoundReader = new DorisCompoundReader(
+            DorisCompoundDirectory::getDirectory(fs, index_dir.c_str()),
+            index_file_name.c_str());
+}
+
+Status BkdIndexReader::new_iterator(const TabletIndex* index_meta,
+                                    InvertedIndexIterator** iterator) {
+    *iterator = new InvertedIndexIterator(index_meta, this);
+    return Status::OK();
+}
+
+Status BkdIndexReader::bkd_query(const std::string& column_name, const void* 
query_value,
+                                 InvertedIndexQueryType query_type,
+                                 
std::shared_ptr<lucene::util::bkd::bkd_reader>&& r,
+                                 InvertedIndexVisitor* visitor) {
+    lucene::util::bkd::bkd_reader* tmp_reader;
+    auto status = get_bkd_reader(tmp_reader);
+    if (!status.ok()) {
+        LOG(WARNING) << "get bkd reader for column " << column_name
+                     << " failed: " << status.code_as_string();
+        return status;
+    }
+    r.reset(tmp_reader);
+    char tmp[r->bytes_per_dim_];
+    switch (query_type) {
+    case InvertedIndexQueryType::EQUAL_QUERY: {
+        _value_key_coder->full_encode_ascending(query_value, 
&visitor->queryMax);
+        _value_key_coder->full_encode_ascending(query_value, 
&visitor->queryMin);
+        break;
+    }
+    case InvertedIndexQueryType::LESS_THAN_QUERY:
+    case InvertedIndexQueryType::LESS_EQUAL_QUERY: {
+        _value_key_coder->full_encode_ascending(query_value, 
&visitor->queryMax);
+        _type_info->set_to_min(tmp);
+        _value_key_coder->full_encode_ascending(tmp, &visitor->queryMin);
+        break;
+    }
+    case InvertedIndexQueryType::GREATER_THAN_QUERY:
+    case InvertedIndexQueryType::GREATER_EQUAL_QUERY: {
+        _value_key_coder->full_encode_ascending(query_value, 
&visitor->queryMin);
+        _type_info->set_to_max(tmp);
+        _value_key_coder->full_encode_ascending(tmp, &visitor->queryMax);
+        break;
+    }
+    default:
+        LOG(ERROR) << "invalid query type when query bkd index";
+        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
+    }
+    visitor->set_reader(r.get());
+    return Status::OK();
+}
+
+Status BkdIndexReader::query(const std::string& column_name, const void* 
query_value,
+                             InvertedIndexQueryType query_type,
+                             InvertedIndexParserType analyser_type, 
roaring::Roaring* bit_map) {
+    uint64_t start = UnixMillis();
+    auto visitor = std::make_unique<InvertedIndexVisitor>(bit_map, query_type);
+    std::shared_ptr<lucene::util::bkd::bkd_reader> r;
+    try {
+        RETURN_IF_ERROR(bkd_query(column_name, query_value, query_type, 
std::move(r), visitor.get()));
+        r->intersect(visitor.get());
+    } catch (const CLuceneError& e) {
+        LOG(WARNING) << "BKD Query CLuceneError Occurred, error msg: " << 
e.what();
+        return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
+    }
+
+    LOG(INFO) << "BKD index search time taken: " << UnixMillis() - start << 
"ms "
+              << " column: " << column_name << " result: " << 
bit_map->cardinality()
+              << " reader stats: " << r->stats.to_string();
+    return Status::OK();
+}
+
+Status BkdIndexReader::get_bkd_reader(lucene::util::bkd::bkd_reader*& 
bkdReader) {
+    // bkd file reader
+    if (compoundReader == nullptr) {
+        LOG(WARNING) << "bkd index input file not found";
+        return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>();
+    }
+    CLuceneError err;
+    lucene::store::IndexInput* data_in;
+    lucene::store::IndexInput* meta_in;
+    lucene::store::IndexInput* index_in;
+
+    if (!compoundReader->openInput(
+                
InvertedIndexDescriptor::get_temporary_bkd_index_data_file_name().c_str(), 
data_in,
+                err) ||
+        !compoundReader->openInput(
+                
InvertedIndexDescriptor::get_temporary_bkd_index_meta_file_name().c_str(), 
meta_in,
+                err) ||
+        !compoundReader->openInput(
+                
InvertedIndexDescriptor::get_temporary_bkd_index_file_name().c_str(), index_in,
+                err)) {
+        LOG(WARNING) << "bkd index input error: " << err.what();
+        return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>();
+    }
+
+    bkdReader = new lucene::util::bkd::bkd_reader(data_in);
+    if (0 == bkdReader->read_meta(meta_in)) {
+        return Status::EndOfFile("bkd index file is empty");
+    }
+
+    bkdReader->read_index(index_in);
+
+    _type_info = get_scalar_type_info((FieldType)bkdReader->type);
+    if (_type_info == nullptr) {
+        auto type = bkdReader->type;
+        delete bkdReader;
+        LOG(WARNING) << "unsupported typeinfo, type=" << type;
+        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
+    }
+    _value_key_coder = get_key_coder(_type_info->type());
+    return Status::OK();
+}
+
+InvertedIndexReaderType BkdIndexReader::type() {
+    return InvertedIndexReaderType::BKD;
+}
+
+InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, 
InvertedIndexQueryType query_type,
+                                           bool only_count)
+        : hits(h), num_hits(0), only_count(only_count), query_type(query_type) 
{}
+
+bool InvertedIndexVisitor::matches(uint8_t* packedValue) {
+    for (int dim = 0; dim < reader->num_data_dims_; dim++) {
+        int offset = dim * reader->bytes_per_dim_;
+        if (query_type == InvertedIndexQueryType::LESS_THAN_QUERY) {
+            if (lucene::util::FutureArrays::CompareUnsigned(
+                        packedValue, offset, offset + reader->bytes_per_dim_,
+                        (const uint8_t*)queryMax.c_str(), offset,
+                        offset + reader->bytes_per_dim_) >= 0) {
+                // Doc's value is too high, in this dimension
+                return false;
+            }
+        } else if (query_type == InvertedIndexQueryType::GREATER_THAN_QUERY) {
+            if (lucene::util::FutureArrays::CompareUnsigned(
+                        packedValue, offset, offset + reader->bytes_per_dim_,
+                        (const uint8_t*)queryMin.c_str(), offset,
+                        offset + reader->bytes_per_dim_) <= 0) {
+                // Doc's value is too high, in this dimension
+                return false;
+            }
+        } else {
+            if (lucene::util::FutureArrays::CompareUnsigned(
+                        packedValue, offset, offset + reader->bytes_per_dim_,
+                        (const uint8_t*)queryMin.c_str(), offset,
+                        offset + reader->bytes_per_dim_) < 0) {
+                // Doc's value is too low, in this dimension
+                return false;
+            }
+            if (lucene::util::FutureArrays::CompareUnsigned(
+                        packedValue, offset, offset + reader->bytes_per_dim_,
+                        (const uint8_t*)queryMax.c_str(), offset,
+                        offset + reader->bytes_per_dim_) > 0) {
+                // Doc's value is too high, in this dimension
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+void InvertedIndexVisitor::visit(std::vector<char>& docID, 
std::vector<uint8_t>& packedValue) {
+    if (!matches(packedValue.data())) {
+        return;
+    }
+    visit(roaring::Roaring::read(docID.data(), false));
+}
+
+void InvertedIndexVisitor::visit(Roaring* docID, std::vector<uint8_t>& 
packedValue) {
+    if (!matches(packedValue.data())) {
+        return;
+    }
+    visit(*docID);
+}
+
+void InvertedIndexVisitor::visit(roaring::Roaring&& r) {
+    if (only_count) {
+        num_hits += r.cardinality();
+    } else {
+        *hits |= r;
+    }
+}
+
+void InvertedIndexVisitor::visit(roaring::Roaring& r) {
+    if (only_count) {
+        num_hits += r.cardinality();
+    } else {
+        *hits |= r;
+    }
+}
+
+void InvertedIndexVisitor::visit(int rowID) {
+    if (only_count) {
+        num_hits++;
+    } else {
+        hits->add(rowID);
+    }
+    if (0) {

Review Comment:
   warning: converting integer literal to bool, use bool literal instead 
[modernize-use-bool-literals]
   
   ```suggestion
       if (false) {
   ```
   



##########
be/src/olap/rowset/segment_v2/inverted_index_reader.h:
##########
@@ -128,6 +134,74 @@ class StringTypeInvertedIndexReader : public 
InvertedIndexReader {
     InvertedIndexReaderType type() override;
 };
 
+class InvertedIndexVisitor : public 
lucene::util::bkd::bkd_reader::intersect_visitor {
+private:
+    roaring::Roaring* hits;
+    uint32_t num_hits;
+    bool only_count;
+    lucene::util::bkd::bkd_reader* reader;
+    InvertedIndexQueryType query_type;
+
+public:
+    std::string queryMin;
+    std::string queryMax;
+
+public:

Review Comment:
   warning: redundant access specifier has the same accessibility as the 
previous access specifier [readability-redundant-access-specifiers]
   
   ```suggestion
   
   ```
   **be/src/olap/rowset/segment_v2/inverted_index_reader.h:144:** previously 
declared here
   ```cpp
   public:
   ^
   ```
   



##########
be/src/olap/rowset/segment_v2/inverted_index_reader.cpp:
##########
@@ -290,6 +291,313 @@
     return InvertedIndexReaderType::STRING_TYPE;
 }
 
+BkdIndexReader::BkdIndexReader(io::FileSystemSPtr fs, const std::string& path,
+                               const uint32_t uniq_id)
+        : InvertedIndexReader(fs, path, uniq_id), compoundReader(nullptr) {
+    io::Path io_path(_path);
+    auto index_dir = io_path.parent_path();
+    auto index_file_name =
+            InvertedIndexDescriptor::get_index_file_name(io_path.filename(), 
_index_id);
+
+    // check index file existence
+    auto index_file = index_dir / index_file_name;
+    if (!indexExists(index_file)) {
+        LOG(WARNING) << "bkd index: " << index_file.string() << " not exist.";
+        return;
+    }
+    compoundReader = new DorisCompoundReader(
+            DorisCompoundDirectory::getDirectory(fs, index_dir.c_str()),
+            index_file_name.c_str());
+}
+
+Status BkdIndexReader::new_iterator(const TabletIndex* index_meta,
+                                    InvertedIndexIterator** iterator) {
+    *iterator = new InvertedIndexIterator(index_meta, this);
+    return Status::OK();
+}
+
+Status BkdIndexReader::bkd_query(const std::string& column_name, const void* 
query_value,
+                                 InvertedIndexQueryType query_type,
+                                 
std::shared_ptr<lucene::util::bkd::bkd_reader>&& r,
+                                 InvertedIndexVisitor* visitor) {
+    lucene::util::bkd::bkd_reader* tmp_reader;
+    auto status = get_bkd_reader(tmp_reader);
+    if (!status.ok()) {
+        LOG(WARNING) << "get bkd reader for column " << column_name
+                     << " failed: " << status.code_as_string();
+        return status;
+    }
+    r.reset(tmp_reader);
+    char tmp[r->bytes_per_dim_];
+    switch (query_type) {
+    case InvertedIndexQueryType::EQUAL_QUERY: {
+        _value_key_coder->full_encode_ascending(query_value, 
&visitor->queryMax);
+        _value_key_coder->full_encode_ascending(query_value, 
&visitor->queryMin);
+        break;
+    }
+    case InvertedIndexQueryType::LESS_THAN_QUERY:
+    case InvertedIndexQueryType::LESS_EQUAL_QUERY: {
+        _value_key_coder->full_encode_ascending(query_value, 
&visitor->queryMax);
+        _type_info->set_to_min(tmp);
+        _value_key_coder->full_encode_ascending(tmp, &visitor->queryMin);
+        break;
+    }
+    case InvertedIndexQueryType::GREATER_THAN_QUERY:
+    case InvertedIndexQueryType::GREATER_EQUAL_QUERY: {
+        _value_key_coder->full_encode_ascending(query_value, 
&visitor->queryMin);
+        _type_info->set_to_max(tmp);
+        _value_key_coder->full_encode_ascending(tmp, &visitor->queryMax);
+        break;
+    }
+    default:
+        LOG(ERROR) << "invalid query type when query bkd index";
+        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
+    }
+    visitor->set_reader(r.get());
+    return Status::OK();
+}
+
+Status BkdIndexReader::query(const std::string& column_name, const void* 
query_value,
+                             InvertedIndexQueryType query_type,
+                             InvertedIndexParserType analyser_type, 
roaring::Roaring* bit_map) {
+    uint64_t start = UnixMillis();
+    auto visitor = std::make_unique<InvertedIndexVisitor>(bit_map, query_type);
+    std::shared_ptr<lucene::util::bkd::bkd_reader> r;
+    try {
+        RETURN_IF_ERROR(bkd_query(column_name, query_value, query_type, 
std::move(r), visitor.get()));
+        r->intersect(visitor.get());
+    } catch (const CLuceneError& e) {
+        LOG(WARNING) << "BKD Query CLuceneError Occurred, error msg: " << 
e.what();
+        return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
+    }
+
+    LOG(INFO) << "BKD index search time taken: " << UnixMillis() - start << 
"ms "
+              << " column: " << column_name << " result: " << 
bit_map->cardinality()
+              << " reader stats: " << r->stats.to_string();
+    return Status::OK();
+}
+
+Status BkdIndexReader::get_bkd_reader(lucene::util::bkd::bkd_reader*& 
bkdReader) {
+    // bkd file reader
+    if (compoundReader == nullptr) {
+        LOG(WARNING) << "bkd index input file not found";
+        return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>();
+    }
+    CLuceneError err;
+    lucene::store::IndexInput* data_in;
+    lucene::store::IndexInput* meta_in;
+    lucene::store::IndexInput* index_in;
+
+    if (!compoundReader->openInput(
+                
InvertedIndexDescriptor::get_temporary_bkd_index_data_file_name().c_str(), 
data_in,
+                err) ||
+        !compoundReader->openInput(
+                
InvertedIndexDescriptor::get_temporary_bkd_index_meta_file_name().c_str(), 
meta_in,
+                err) ||
+        !compoundReader->openInput(
+                
InvertedIndexDescriptor::get_temporary_bkd_index_file_name().c_str(), index_in,
+                err)) {
+        LOG(WARNING) << "bkd index input error: " << err.what();
+        return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>();
+    }
+
+    bkdReader = new lucene::util::bkd::bkd_reader(data_in);
+    if (0 == bkdReader->read_meta(meta_in)) {
+        return Status::EndOfFile("bkd index file is empty");
+    }
+
+    bkdReader->read_index(index_in);
+
+    _type_info = get_scalar_type_info((FieldType)bkdReader->type);
+    if (_type_info == nullptr) {
+        auto type = bkdReader->type;
+        delete bkdReader;
+        LOG(WARNING) << "unsupported typeinfo, type=" << type;
+        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
+    }
+    _value_key_coder = get_key_coder(_type_info->type());
+    return Status::OK();
+}
+
+InvertedIndexReaderType BkdIndexReader::type() {
+    return InvertedIndexReaderType::BKD;
+}
+
+InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, 
InvertedIndexQueryType query_type,
+                                           bool only_count)
+        : hits(h), num_hits(0), only_count(only_count), query_type(query_type) 
{}
+
+bool InvertedIndexVisitor::matches(uint8_t* packedValue) {
+    for (int dim = 0; dim < reader->num_data_dims_; dim++) {
+        int offset = dim * reader->bytes_per_dim_;
+        if (query_type == InvertedIndexQueryType::LESS_THAN_QUERY) {
+            if (lucene::util::FutureArrays::CompareUnsigned(
+                        packedValue, offset, offset + reader->bytes_per_dim_,
+                        (const uint8_t*)queryMax.c_str(), offset,
+                        offset + reader->bytes_per_dim_) >= 0) {
+                // Doc's value is too high, in this dimension
+                return false;
+            }
+        } else if (query_type == InvertedIndexQueryType::GREATER_THAN_QUERY) {
+            if (lucene::util::FutureArrays::CompareUnsigned(
+                        packedValue, offset, offset + reader->bytes_per_dim_,
+                        (const uint8_t*)queryMin.c_str(), offset,
+                        offset + reader->bytes_per_dim_) <= 0) {
+                // Doc's value is too high, in this dimension
+                return false;
+            }
+        } else {
+            if (lucene::util::FutureArrays::CompareUnsigned(
+                        packedValue, offset, offset + reader->bytes_per_dim_,
+                        (const uint8_t*)queryMin.c_str(), offset,
+                        offset + reader->bytes_per_dim_) < 0) {
+                // Doc's value is too low, in this dimension
+                return false;
+            }
+            if (lucene::util::FutureArrays::CompareUnsigned(
+                        packedValue, offset, offset + reader->bytes_per_dim_,
+                        (const uint8_t*)queryMax.c_str(), offset,
+                        offset + reader->bytes_per_dim_) > 0) {
+                // Doc's value is too high, in this dimension
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+void InvertedIndexVisitor::visit(std::vector<char>& docID, 
std::vector<uint8_t>& packedValue) {
+    if (!matches(packedValue.data())) {
+        return;
+    }
+    visit(roaring::Roaring::read(docID.data(), false));
+}
+
+void InvertedIndexVisitor::visit(Roaring* docID, std::vector<uint8_t>& 
packedValue) {
+    if (!matches(packedValue.data())) {
+        return;
+    }
+    visit(*docID);
+}
+
+void InvertedIndexVisitor::visit(roaring::Roaring&& r) {
+    if (only_count) {
+        num_hits += r.cardinality();
+    } else {
+        *hits |= r;
+    }
+}
+
+void InvertedIndexVisitor::visit(roaring::Roaring& r) {
+    if (only_count) {
+        num_hits += r.cardinality();
+    } else {
+        *hits |= r;
+    }
+}
+
+void InvertedIndexVisitor::visit(int rowID) {
+    if (only_count) {
+        num_hits++;
+    } else {
+        hits->add(rowID);
+    }
+    if (0) {
+        std::wcout << L"visit docID=" << rowID << std::endl;
+    }
+}
+
+void InvertedIndexVisitor::visit(lucene::util::bkd::bkd_docid_set_iterator* 
iter,
+                                 std::vector<uint8_t>& packedValue) {
+    if (!matches(packedValue.data())) {
+        return;
+    }
+    int32_t docID = iter->docid_set->nextDoc();
+    while (docID != lucene::util::bkd::bkd_docid_set::NO_MORE_DOCS) {
+        if (only_count) {
+            num_hits++;
+        } else {
+            hits->add(docID);
+        }
+        docID = iter->docid_set->nextDoc();
+    }
+}
+
+void InvertedIndexVisitor::visit(int rowID, std::vector<uint8_t>& packedValue) 
{
+    if (0) {

Review Comment:
   warning: converting integer literal to bool, use bool literal instead 
[modernize-use-bool-literals]
   
   ```suggestion
       if (false) {
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to