This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 198f5329f8d [feature](inverted index) Add profile statistics for each 
condition in inverted index filters (#47504)
198f5329f8d is described below

commit 198f5329f8d2413c6910df56ffe712c3fe48b3de
Author: zzzxl <yangs...@selectdb.com>
AuthorDate: Fri Feb 21 10:21:22 2025 +0800

    [feature](inverted index) Add profile statistics for each condition in 
inverted index filters (#47504)
    
    Problem Summary:
    
    select count() from httplogs where clientip match '232.71.0.0' and
    request match 'images';
    
    IndexFilter:
          -  HitRows:  0ns
              -  fr_clientip:  10.392K  (10392)
              -  fr_request:  28.601172M  (28601172)
          -  ExecTime:  0ns
              -  ft_clientip:  2.65ms
              -  ft_request:  201.778ms
    
    FilteredRows: Represents the count of rows that met the filtering
    conditions post-index filtering.
    FilteredTime: Represents the time taken to complete the filtering
    operation.
---
 be/src/olap/inverted_index_profile.h               | 57 ++++++++++++++++++++++
 be/src/olap/inverted_index_stats.h                 | 34 +++++++++++++
 be/src/olap/olap_common.h                          |  2 +
 .../rowset/segment_v2/inverted_index_reader.cpp    | 20 +++++++-
 be/src/pipeline/exec/olap_scan_operator.cpp        |  4 ++
 be/src/pipeline/exec/olap_scan_operator.h          |  1 +
 be/src/vec/exec/scan/new_olap_scanner.cpp          |  6 +++
 be/test/olap/inverted_index_profile_test.cpp       | 44 +++++++++++++++++
 8 files changed, 166 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/inverted_index_profile.h 
b/be/src/olap/inverted_index_profile.h
new file mode 100644
index 00000000000..9255e41dc55
--- /dev/null
+++ b/be/src/olap/inverted_index_profile.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "olap/inverted_index_stats.h"
+#include "util/runtime_profile.h"
+
+namespace doris {
+
+class InvertedIndexProfileReporter {
+public:
+    InvertedIndexProfileReporter() = default;
+    ~InvertedIndexProfileReporter() = default;
+
+    void update(RuntimeProfile* profile, const InvertedIndexStatistics* 
statistics) {
+        // Determine the iteration limit: the smaller of 20 or the size of 
statistics->stats
+        size_t iteration_limit = std::min<size_t>(20, 
statistics->stats.size());
+
+        for (size_t i = 0; i < iteration_limit; ++i) {
+            const auto& stats = statistics->stats[i];
+
+            ADD_TIMER_WITH_LEVEL(profile, hit_rows_name, 1);
+            auto* hit_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "fr_" + 
stats.column_name,
+                                                          TUnit::UNIT, 
hit_rows_name, 1);
+            COUNTER_UPDATE(hit_rows, stats.hit_rows);
+
+            ADD_TIMER_WITH_LEVEL(profile, exec_time_name, 1);
+            auto* exec_time = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ft_" + 
stats.column_name,
+                                                           TUnit::TIME_NS, 
exec_time_name, 1);
+            COUNTER_UPDATE(exec_time, stats.exec_time);
+        }
+    }
+
+private:
+    static constexpr const char* hit_rows_name = "HitRows";
+    static constexpr const char* exec_time_name = "ExecTime";
+};
+
+} // namespace doris
diff --git a/be/src/olap/inverted_index_stats.h 
b/be/src/olap/inverted_index_stats.h
new file mode 100644
index 00000000000..b82b230f41d
--- /dev/null
+++ b/be/src/olap/inverted_index_stats.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+namespace doris {
+
+struct InvertedIndexQueryStatistics {
+    std::string column_name;
+    int64_t hit_rows = 0;
+    int64_t exec_time = 0;
+};
+
+struct InvertedIndexStatistics {
+    std::vector<InvertedIndexQueryStatistics> stats;
+};
+
+} // namespace doris
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 623d2c83e49..24477b9b66b 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -38,6 +38,7 @@
 #include "common/config.h"
 #include "common/exception.h"
 #include "io/io_common.h"
+#include "olap/inverted_index_stats.h"
 #include "olap/olap_define.h"
 #include "olap/rowset/rowset_fwd.h"
 #include "util/hash_util.hpp"
@@ -378,6 +379,7 @@ struct OlapReaderStatistics {
     int64_t inverted_index_searcher_cache_hit = 0;
     int64_t inverted_index_searcher_cache_miss = 0;
     int64_t inverted_index_downgrade_count = 0;
+    InvertedIndexStatistics inverted_index_stats;
 
     int64_t output_index_result_column_timer = 0;
     // number of segment filtered by column stat when creating seg iterator
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 5da74fd1dcf..c885072ee16 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -1177,8 +1177,24 @@ Status InvertedIndexIterator::read_from_inverted_index(
         }
     }
 
-    RETURN_IF_ERROR(_reader->query(&_io_ctx, _stats, _runtime_state, 
column_name, query_value,
-                                   query_type, bit_map));
+    auto execute_query = [&]() {
+        return _reader->query(&_io_ctx, _stats, _runtime_state, column_name, 
query_value,
+                              query_type, bit_map);
+    };
+
+    if (_runtime_state->query_options().enable_profile) {
+        InvertedIndexQueryStatistics query_stats;
+        {
+            SCOPED_RAW_TIMER(&query_stats.exec_time);
+            RETURN_IF_ERROR(execute_query());
+        }
+        query_stats.column_name = column_name;
+        query_stats.hit_rows = bit_map->cardinality();
+        _stats->inverted_index_stats.stats.emplace_back(query_stats);
+    } else {
+        RETURN_IF_ERROR(execute_query());
+    }
+
     return Status::OK();
 }
 
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp 
b/be/src/pipeline/exec/olap_scan_operator.cpp
index 37c7664358d..604bf920045 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -196,6 +196,10 @@ Status OlapScanLocalState::_init_profile() {
     _segment_create_column_readers_timer =
             ADD_TIMER(_scanner_profile, "SegmentCreateColumnReadersTimer");
     _segment_load_index_timer = ADD_TIMER(_scanner_profile, 
"SegmentLoadIndexTimer");
+
+    _index_filter_profile = std::make_unique<RuntimeProfile>("IndexFilter");
+    _scanner_profile->add_child(_index_filter_profile.get(), true, nullptr);
+
     return Status::OK();
 }
 
diff --git a/be/src/pipeline/exec/olap_scan_operator.h 
b/be/src/pipeline/exec/olap_scan_operator.h
index 7efe357fe3b..347c29e9d43 100644
--- a/be/src/pipeline/exec/olap_scan_operator.h
+++ b/be/src/pipeline/exec/olap_scan_operator.h
@@ -99,6 +99,7 @@ private:
     std::set<int32_t> _maybe_read_column_ids;
 
     std::unique_ptr<RuntimeProfile> _segment_profile;
+    std::unique_ptr<RuntimeProfile> _index_filter_profile;
 
     RuntimeProfile::Counter* _tablet_counter = nullptr;
     RuntimeProfile::Counter* _key_range_counter = nullptr;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp 
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index 248d391aadd..d877a064e90 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -41,6 +41,7 @@
 #include "exprs/function_filter.h"
 #include "io/cache/block_file_cache_profile.h"
 #include "io/io_common.h"
+#include "olap/inverted_index_profile.h"
 #include "olap/olap_common.h"
 #include "olap/olap_tuple.h"
 #include "olap/rowset/rowset.h"
@@ -628,6 +629,11 @@ void NewOlapScanner::_collect_profile_before_close() {
                    stats.inverted_index_searcher_cache_miss);
     COUNTER_UPDATE(local_state->_inverted_index_downgrade_count_counter,
                    stats.inverted_index_downgrade_count);
+
+    InvertedIndexProfileReporter inverted_index_profile;
+    inverted_index_profile.update(local_state->_index_filter_profile.get(),
+                                  &stats.inverted_index_stats);
+
     if (config::enable_file_cache) {
         io::FileCacheProfileReporter 
cache_profile(local_state->_segment_profile.get());
         cache_profile.update(&stats.file_cache_stats);
diff --git a/be/test/olap/inverted_index_profile_test.cpp 
b/be/test/olap/inverted_index_profile_test.cpp
new file mode 100644
index 00000000000..e3aa3555604
--- /dev/null
+++ b/be/test/olap/inverted_index_profile_test.cpp
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/inverted_index_profile.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "olap/inverted_index_stats.h"
+
+namespace doris {
+
+TEST(InvertedIndexProfileReporterTest, UpdateTest) {
+    auto runtime_profile = std::make_unique<RuntimeProfile>("test_profile");
+
+    InvertedIndexStatistics statistics;
+    statistics.stats.push_back({"test_column1", 101, 201});
+    statistics.stats.push_back({"test_column2", 102, 202});
+
+    InvertedIndexProfileReporter reporter;
+    reporter.update(runtime_profile.get(), &statistics);
+
+    ASSERT_EQ(runtime_profile->get_counter("fr_test_column1")->value(), 101);
+    ASSERT_EQ(runtime_profile->get_counter("ft_test_column1")->value(), 201);
+    ASSERT_EQ(runtime_profile->get_counter("fr_test_column2")->value(), 102);
+    ASSERT_EQ(runtime_profile->get_counter("ft_test_column2")->value(), 202);
+}
+
+} // namespace doris
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to