This is an automated email from the ASF dual-hosted git repository. jianliangqi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 198f5329f8d [feature](inverted index) Add profile statistics for each condition in inverted index filters (#47504) 198f5329f8d is described below commit 198f5329f8d2413c6910df56ffe712c3fe48b3de Author: zzzxl <yangs...@selectdb.com> AuthorDate: Fri Feb 21 10:21:22 2025 +0800 [feature](inverted index) Add profile statistics for each condition in inverted index filters (#47504) Problem Summary: select count() from httplogs where clientip match '232.71.0.0' and request match 'images'; IndexFilter: - HitRows: 0ns - fr_clientip: 10.392K (10392) - fr_request: 28.601172M (28601172) - ExecTime: 0ns - ft_clientip: 2.65ms - ft_request: 201.778ms FilteredRows: Represents the count of rows that met the filtering conditions post-index filtering. FilteredTime: Represents the time taken to complete the filtering operation. --- be/src/olap/inverted_index_profile.h | 57 ++++++++++++++++++++++ be/src/olap/inverted_index_stats.h | 34 +++++++++++++ be/src/olap/olap_common.h | 2 + .../rowset/segment_v2/inverted_index_reader.cpp | 20 +++++++- be/src/pipeline/exec/olap_scan_operator.cpp | 4 ++ be/src/pipeline/exec/olap_scan_operator.h | 1 + be/src/vec/exec/scan/new_olap_scanner.cpp | 6 +++ be/test/olap/inverted_index_profile_test.cpp | 44 +++++++++++++++++ 8 files changed, 166 insertions(+), 2 deletions(-) diff --git a/be/src/olap/inverted_index_profile.h b/be/src/olap/inverted_index_profile.h new file mode 100644 index 00000000000..9255e41dc55 --- /dev/null +++ b/be/src/olap/inverted_index_profile.h @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <string> +#include <vector> + +#include "olap/inverted_index_stats.h" +#include "util/runtime_profile.h" + +namespace doris { + +class InvertedIndexProfileReporter { +public: + InvertedIndexProfileReporter() = default; + ~InvertedIndexProfileReporter() = default; + + void update(RuntimeProfile* profile, const InvertedIndexStatistics* statistics) { + // Determine the iteration limit: the smaller of 20 or the size of statistics->stats + size_t iteration_limit = std::min<size_t>(20, statistics->stats.size()); + + for (size_t i = 0; i < iteration_limit; ++i) { + const auto& stats = statistics->stats[i]; + + ADD_TIMER_WITH_LEVEL(profile, hit_rows_name, 1); + auto* hit_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "fr_" + stats.column_name, + TUnit::UNIT, hit_rows_name, 1); + COUNTER_UPDATE(hit_rows, stats.hit_rows); + + ADD_TIMER_WITH_LEVEL(profile, exec_time_name, 1); + auto* exec_time = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ft_" + stats.column_name, + TUnit::TIME_NS, exec_time_name, 1); + COUNTER_UPDATE(exec_time, stats.exec_time); + } + } + +private: + static constexpr const char* hit_rows_name = "HitRows"; + static constexpr const char* exec_time_name = "ExecTime"; +}; + +} // namespace doris diff --git a/be/src/olap/inverted_index_stats.h b/be/src/olap/inverted_index_stats.h new file mode 100644 index 00000000000..b82b230f41d --- /dev/null +++ b/be/src/olap/inverted_index_stats.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <vector> + +namespace doris { + +struct InvertedIndexQueryStatistics { + std::string column_name; + int64_t hit_rows = 0; + int64_t exec_time = 0; +}; + +struct InvertedIndexStatistics { + std::vector<InvertedIndexQueryStatistics> stats; +}; + +} // namespace doris diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 623d2c83e49..24477b9b66b 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -38,6 +38,7 @@ #include "common/config.h" #include "common/exception.h" #include "io/io_common.h" +#include "olap/inverted_index_stats.h" #include "olap/olap_define.h" #include "olap/rowset/rowset_fwd.h" #include "util/hash_util.hpp" @@ -378,6 +379,7 @@ struct OlapReaderStatistics { int64_t inverted_index_searcher_cache_hit = 0; int64_t inverted_index_searcher_cache_miss = 0; int64_t inverted_index_downgrade_count = 0; + InvertedIndexStatistics inverted_index_stats; int64_t output_index_result_column_timer = 0; // number of segment filtered by column stat when creating seg iterator diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 5da74fd1dcf..c885072ee16 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -1177,8 +1177,24 @@ Status InvertedIndexIterator::read_from_inverted_index( } } - RETURN_IF_ERROR(_reader->query(&_io_ctx, _stats, _runtime_state, column_name, query_value, - query_type, bit_map)); + auto execute_query = [&]() { + return _reader->query(&_io_ctx, _stats, _runtime_state, column_name, query_value, + query_type, bit_map); + }; + + if (_runtime_state->query_options().enable_profile) { + InvertedIndexQueryStatistics query_stats; + { + SCOPED_RAW_TIMER(&query_stats.exec_time); + RETURN_IF_ERROR(execute_query()); + } + query_stats.column_name = column_name; + query_stats.hit_rows = bit_map->cardinality(); + _stats->inverted_index_stats.stats.emplace_back(query_stats); + } else { + RETURN_IF_ERROR(execute_query()); + } + return Status::OK(); } diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index 37c7664358d..604bf920045 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -196,6 +196,10 @@ Status OlapScanLocalState::_init_profile() { _segment_create_column_readers_timer = ADD_TIMER(_scanner_profile, "SegmentCreateColumnReadersTimer"); _segment_load_index_timer = ADD_TIMER(_scanner_profile, "SegmentLoadIndexTimer"); + + _index_filter_profile = std::make_unique<RuntimeProfile>("IndexFilter"); + _scanner_profile->add_child(_index_filter_profile.get(), true, nullptr); + return Status::OK(); } diff --git a/be/src/pipeline/exec/olap_scan_operator.h b/be/src/pipeline/exec/olap_scan_operator.h index 7efe357fe3b..347c29e9d43 100644 --- a/be/src/pipeline/exec/olap_scan_operator.h +++ b/be/src/pipeline/exec/olap_scan_operator.h @@ -99,6 +99,7 @@ private: std::set<int32_t> _maybe_read_column_ids; std::unique_ptr<RuntimeProfile> _segment_profile; + std::unique_ptr<RuntimeProfile> _index_filter_profile; RuntimeProfile::Counter* _tablet_counter = nullptr; RuntimeProfile::Counter* _key_range_counter = nullptr; diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 248d391aadd..d877a064e90 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -41,6 +41,7 @@ #include "exprs/function_filter.h" #include "io/cache/block_file_cache_profile.h" #include "io/io_common.h" +#include "olap/inverted_index_profile.h" #include "olap/olap_common.h" #include "olap/olap_tuple.h" #include "olap/rowset/rowset.h" @@ -628,6 +629,11 @@ void NewOlapScanner::_collect_profile_before_close() { stats.inverted_index_searcher_cache_miss); COUNTER_UPDATE(local_state->_inverted_index_downgrade_count_counter, stats.inverted_index_downgrade_count); + + InvertedIndexProfileReporter inverted_index_profile; + inverted_index_profile.update(local_state->_index_filter_profile.get(), + &stats.inverted_index_stats); + if (config::enable_file_cache) { io::FileCacheProfileReporter cache_profile(local_state->_segment_profile.get()); cache_profile.update(&stats.file_cache_stats); diff --git a/be/test/olap/inverted_index_profile_test.cpp b/be/test/olap/inverted_index_profile_test.cpp new file mode 100644 index 00000000000..e3aa3555604 --- /dev/null +++ b/be/test/olap/inverted_index_profile_test.cpp @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/inverted_index_profile.h" + +#include <gtest/gtest.h> + +#include <memory> + +#include "olap/inverted_index_stats.h" + +namespace doris { + +TEST(InvertedIndexProfileReporterTest, UpdateTest) { + auto runtime_profile = std::make_unique<RuntimeProfile>("test_profile"); + + InvertedIndexStatistics statistics; + statistics.stats.push_back({"test_column1", 101, 201}); + statistics.stats.push_back({"test_column2", 102, 202}); + + InvertedIndexProfileReporter reporter; + reporter.update(runtime_profile.get(), &statistics); + + ASSERT_EQ(runtime_profile->get_counter("fr_test_column1")->value(), 101); + ASSERT_EQ(runtime_profile->get_counter("ft_test_column1")->value(), 201); + ASSERT_EQ(runtime_profile->get_counter("fr_test_column2")->value(), 102); + ASSERT_EQ(runtime_profile->get_counter("ft_test_column2")->value(), 202); +} + +} // namespace doris \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org