This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 56b3ffef27a [enhancement](statistics) Add segment footer open stats to profile (#50326) 56b3ffef27a is described below commit 56b3ffef27af3acc2335bffd28ed691c5952bbab Author: Siyang Tang <tangsiy...@selectdb.com> AuthorDate: Fri Apr 25 09:49:48 2025 +0800 [enhancement](statistics) Add segment footer open stats to profile (#50326) --- be/src/olap/rowset/beta_rowset.cpp | 7 +++--- be/src/olap/rowset/beta_rowset.h | 4 ++- .../segment_v2/lazy_init_segment_iterator.cpp | 9 +++---- be/src/olap/rowset/segment_v2/segment.cpp | 29 ++++++++++++---------- be/src/olap/rowset/segment_v2/segment.h | 12 +++++---- be/src/olap/segment_loader.cpp | 2 +- be/test/olap/date_bloom_filter_test.cpp | 8 +++--- be/test/olap/segment_footer_cache_test.cpp | 8 +++--- 8 files changed, 43 insertions(+), 36 deletions(-) diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index b25b261c2bb..850a3480708 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -171,14 +171,15 @@ Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end, int64_t seg_id = seg_id_begin; while (seg_id < seg_id_end) { std::shared_ptr<segment_v2::Segment> segment; - RETURN_IF_ERROR(load_segment(seg_id, &segment)); + RETURN_IF_ERROR(load_segment(seg_id, nullptr, &segment)); segments->push_back(std::move(segment)); seg_id++; } return Status::OK(); } -Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment) { +Status BetaRowset::load_segment(int64_t seg_id, OlapReaderStatistics* stats, + segment_v2::SegmentSharedPtr* segment) { auto fs = _rowset_meta->fs(); if (!fs) { return Status::Error<INIT_FAILED>("get fs failed"); @@ -196,7 +197,7 @@ Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* se auto s = segment_v2::Segment::open(fs, seg_path, _rowset_meta->tablet_id(), seg_id, rowset_id(), _schema, reader_options, segment, - _rowset_meta->inverted_index_file_info(seg_id)); + _rowset_meta->inverted_index_file_info(seg_id), stats); if (!s.ok()) { LOG(WARNING) << "failed to open segment. " << seg_path << " under rowset " << rowset_id() << " : " << s.to_string(); diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index 4b1388a1f08..f3a8230383d 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -26,6 +26,7 @@ #include <vector> #include "common/status.h" +#include "olap/olap_common.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_meta.h" #include "olap/rowset/rowset_reader.h" @@ -76,7 +77,8 @@ public: Status load_segments(int64_t seg_id_begin, int64_t seg_id_end, std::vector<segment_v2::SegmentSharedPtr>* segments); - Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment); + Status load_segment(int64_t seg_id, OlapReaderStatistics* read_stats, + segment_v2::SegmentSharedPtr* segment); Status get_segments_size(std::vector<size_t>* segments_size); diff --git a/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp b/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp index 77e2310fc48..af663b36a39 100644 --- a/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp @@ -30,9 +30,8 @@ LazyInitSegmentIterator::LazyInitSegmentIterator(BetaRowsetSharedPtr rowset, int _schema(std::move(schema)), _read_options(opts) {} -/// Here do not use the argument of `opts`, -/// see where the iterator is created in `BetaRowsetReader::get_segment_iterators` -Status LazyInitSegmentIterator::init(const StorageReadOptions& /*opts*/) { +/// See where the iterator is created in `BetaRowsetReader::get_segment_iterators` +Status LazyInitSegmentIterator::init(const StorageReadOptions& opts) { _need_lazy_init = false; if (_inner_iterator) { return Status::OK(); @@ -42,7 +41,7 @@ Status LazyInitSegmentIterator::init(const StorageReadOptions& /*opts*/) { { SegmentCacheHandle segment_cache_handle; RETURN_IF_ERROR(SegmentLoader::instance()->load_segment( - _rowset, _segment_id, &segment_cache_handle, _should_use_cache, false)); + _rowset, _segment_id, &segment_cache_handle, _should_use_cache, false, opts.stats)); const auto& tmp_segments = segment_cache_handle.get_segments(); segment = tmp_segments[0]; } @@ -50,4 +49,4 @@ Status LazyInitSegmentIterator::init(const StorageReadOptions& /*opts*/) { return _inner_iterator->init(_read_options); } -} // namespace doris::segment_v2 \ No newline at end of file +} // namespace doris::segment_v2 diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 90135b5826d..6c283de3285 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -81,9 +81,9 @@ class InvertedIndexIterator; Status Segment::open(io::FileSystemSPtr fs, const std::string& path, int64_t tablet_id, uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema, const io::FileReaderOptions& reader_options, std::shared_ptr<Segment>* output, - InvertedIndexFileInfo idx_file_info) { + InvertedIndexFileInfo idx_file_info, OlapReaderStatistics* stats) { auto s = _open(fs, path, segment_id, rowset_id, tablet_schema, reader_options, output, - idx_file_info); + idx_file_info, stats); if (!s.ok()) { if (!config::is_cloud_mode()) { auto res = ExecEnv::get_tablet(tablet_id); @@ -101,14 +101,14 @@ Status Segment::open(io::FileSystemSPtr fs, const std::string& path, int64_t tab Status Segment::_open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema, const io::FileReaderOptions& reader_options, std::shared_ptr<Segment>* output, - InvertedIndexFileInfo idx_file_info) { + InvertedIndexFileInfo idx_file_info, OlapReaderStatistics* stats) { io::FileReaderSPtr file_reader; RETURN_IF_ERROR(fs->open_file(path, &file_reader, &reader_options)); std::shared_ptr<Segment> segment( new Segment(segment_id, rowset_id, std::move(tablet_schema), idx_file_info)); segment->_fs = fs; segment->_file_reader = std::move(file_reader); - auto st = segment->_open(); + auto st = segment->_open(stats); TEST_INJECTION_POINT_CALLBACK("Segment::open:corruption", &st); if (st.is<ErrorCode::CORRUPTION>() && reader_options.cache_type == io::FileCachePolicy::FILE_BLOCK_CACHE) { @@ -121,7 +121,7 @@ Status Segment::_open(io::FileSystemSPtr fs, const std::string& path, uint32_t s RETURN_IF_ERROR(fs->open_file(path, &file_reader, &reader_options)); segment->_file_reader = std::move(file_reader); - st = segment->_open(); + st = segment->_open(stats); TEST_INJECTION_POINT_CALLBACK("Segment::open:corruption1", &st); if (st.is<ErrorCode::CORRUPTION>()) { // corrupt again LOG(WARNING) << "failed to try to read remote source file again with cache support," @@ -134,7 +134,7 @@ Status Segment::_open(io::FileSystemSPtr fs, const std::string& path, uint32_t s opt.cache_type = io::FileCachePolicy::NO_CACHE; // skip cache RETURN_IF_ERROR(fs->open_file(path, &file_reader, &opt)); segment->_file_reader = std::move(file_reader); - st = segment->_open(); + st = segment->_open(stats); if (!st.ok()) { LOG(WARNING) << "failed to try to read remote source file directly," << " file path: " << path @@ -177,9 +177,9 @@ void Segment::update_metadata_size() { _tracked_meta_mem_usage = _meta_mem_usage; } -Status Segment::_open() { +Status Segment::_open(OlapReaderStatistics* stats) { std::shared_ptr<SegmentFooterPB> footer_pb_shared; - RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared)); + RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared, stats)); _pk_index_meta.reset( footer_pb_shared->has_primary_key_index_meta() @@ -390,7 +390,8 @@ Status Segment::_write_error_file(size_t file_size, size_t offset, size_t bytes_ return Status::OK(); // already exists }; -Status Segment::_parse_footer(std::shared_ptr<SegmentFooterPB>& footer) { +Status Segment::_parse_footer(std::shared_ptr<SegmentFooterPB>& footer, + OlapReaderStatistics* stats) { // Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), MagicNumber(4) auto file_size = _file_reader->size(); if (file_size < 12) { @@ -402,7 +403,8 @@ Status Segment::_parse_footer(std::shared_ptr<SegmentFooterPB>& footer) { uint8_t fixed_buf[12]; size_t bytes_read = 0; // TODO(plat1ko): Support session variable `enable_file_cache` - io::IOContext io_ctx {.is_index_data = true}; + io::IOContext io_ctx {.is_index_data = true, + .file_cache_stats = stats ? &stats->file_cache_stats : nullptr}; RETURN_IF_ERROR( _file_reader->read_at(file_size - 12, Slice(fixed_buf, 12), &bytes_read, &io_ctx)); DCHECK_EQ(bytes_read, 12); @@ -606,7 +608,7 @@ Status Segment::_create_column_readers_once(OlapReaderStatistics* stats) { SCOPED_RAW_TIMER(&stats->segment_create_column_readers_timer_ns); return _create_column_readers_once_call.call([&] { std::shared_ptr<SegmentFooterPB> footer_pb_shared; - RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared)); + RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared, stats)); return _create_column_readers(*footer_pb_shared); }); } @@ -1185,7 +1187,8 @@ Status Segment::seek_and_read_by_rowid(const TabletSchema& schema, SlotDescripto return Status::OK(); } -Status Segment::_get_segment_footer(std::shared_ptr<SegmentFooterPB>& footer_pb) { +Status Segment::_get_segment_footer(std::shared_ptr<SegmentFooterPB>& footer_pb, + OlapReaderStatistics* stats) { std::shared_ptr<SegmentFooterPB> footer_pb_shared = _footer_pb.lock(); if (footer_pb_shared != nullptr) { footer_pb = footer_pb_shared; @@ -1205,7 +1208,7 @@ Status Segment::_get_segment_footer(std::shared_ptr<SegmentFooterPB>& footer_pb) if (!segment_footer_cache->lookup(cache_key, &cache_handle, segment_v2::PageTypePB::DATA_PAGE)) { - RETURN_IF_ERROR(_parse_footer(footer_pb_shared)); + RETURN_IF_ERROR(_parse_footer(footer_pb_shared, stats)); segment_footer_cache->insert(cache_key, footer_pb_shared, footer_pb_shared->ByteSizeLong(), &cache_handle, segment_v2::PageTypePB::DATA_PAGE); } else { diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 382db37976c..4b08b9c0d00 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -83,7 +83,8 @@ public: static Status open(io::FileSystemSPtr fs, const std::string& path, int64_t tablet_id, uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema, const io::FileReaderOptions& reader_options, - std::shared_ptr<Segment>* output, InvertedIndexFileInfo idx_file_info = {}); + std::shared_ptr<Segment>* output, InvertedIndexFileInfo idx_file_info = {}, + OlapReaderStatistics* stats = nullptr); static io::UInt128Wrapper file_cache_key(std::string_view rowset_id, uint32_t seg_id); io::UInt128Wrapper file_cache_key() const { @@ -225,10 +226,11 @@ private: static Status _open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema, const io::FileReaderOptions& reader_options, - std::shared_ptr<Segment>* output, InvertedIndexFileInfo idx_file_info); + std::shared_ptr<Segment>* output, InvertedIndexFileInfo idx_file_info, + OlapReaderStatistics* stats); // open segment file and read the minimum amount of necessary information (footer) - Status _open(); - Status _parse_footer(std::shared_ptr<SegmentFooterPB>& footer); + Status _open(OlapReaderStatistics* stats); + Status _parse_footer(std::shared_ptr<SegmentFooterPB>& footer, OlapReaderStatistics* stats); Status _create_column_readers(const SegmentFooterPB& footer); Status _load_pk_bloom_filter(OlapReaderStatistics* stats); ColumnReader* _get_column_reader(const TabletColumn& col); @@ -245,7 +247,7 @@ private: Status _create_column_readers_once(OlapReaderStatistics* stats); - Status _get_segment_footer(std::shared_ptr<SegmentFooterPB>&); + Status _get_segment_footer(std::shared_ptr<SegmentFooterPB>&, OlapReaderStatistics* stats); StoragePageCache::CacheKey get_segment_footer_cache_key() const; diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index 1bd21ad4e55..5385a2d5cf0 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -67,7 +67,7 @@ Status SegmentLoader::load_segment(const BetaRowsetSharedPtr& rowset, int64_t se } // If the segment is not healthy, then will create a new segment and will replace the unhealthy one in SegmentCache. segment_v2::SegmentSharedPtr segment; - RETURN_IF_ERROR(rowset->load_segment(segment_id, &segment)); + RETURN_IF_ERROR(rowset->load_segment(segment_id, index_load_stats, &segment)); if (need_load_pk_index_and_bf) { RETURN_IF_ERROR(segment->load_pk_index_and_bf(index_load_stats)); } diff --git a/be/test/olap/date_bloom_filter_test.cpp b/be/test/olap/date_bloom_filter_test.cpp index f0e8eb11579..e902e61cf21 100644 --- a/be/test/olap/date_bloom_filter_test.cpp +++ b/be/test/olap/date_bloom_filter_test.cpp @@ -150,9 +150,9 @@ TEST_F(DateBloomFilterTest, query_index_test) { EXPECT_TRUE(_tablet->add_rowset(rowset).ok()); segment_v2::SegmentSharedPtr segment; - EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok()); + EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, nullptr, &segment).ok()); std::shared_ptr<SegmentFooterPB> footer_pb_shared; - auto st = segment->_get_segment_footer(footer_pb_shared); + auto st = segment->_get_segment_footer(footer_pb_shared, nullptr); EXPECT_TRUE(st.ok()); st = segment->_create_column_readers(*footer_pb_shared); EXPECT_TRUE(st.ok()); @@ -230,9 +230,9 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { EXPECT_TRUE(_tablet->add_rowset(rowset).ok()); segment_v2::SegmentSharedPtr segment; - EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok()); + EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, nullptr, &segment).ok()); std::shared_ptr<SegmentFooterPB> footer_pb_shared; - auto st = segment->_get_segment_footer(footer_pb_shared); + auto st = segment->_get_segment_footer(footer_pb_shared, nullptr); EXPECT_TRUE(st.ok()); st = segment->_create_column_readers(*(footer_pb_shared)); EXPECT_TRUE(st.ok()); diff --git a/be/test/olap/segment_footer_cache_test.cpp b/be/test/olap/segment_footer_cache_test.cpp index f0ed4960151..25b809c6055 100644 --- a/be/test/olap/segment_footer_cache_test.cpp +++ b/be/test/olap/segment_footer_cache_test.cpp @@ -151,13 +151,13 @@ private: TEST_F(SegmentFooterCacheTest, TestGetSegmentFooter) { for (auto segment_ptr : _segments) { std::shared_ptr<segment_v2::SegmentFooterPB> footer; - Status st = segment_ptr->_get_segment_footer(footer); + Status st = segment_ptr->_get_segment_footer(footer, nullptr); ASSERT_TRUE(st.ok()); } for (auto segment_ptr : _segments) { std::shared_ptr<segment_v2::SegmentFooterPB> footer; - Status st = segment_ptr->_get_segment_footer(footer); + Status st = segment_ptr->_get_segment_footer(footer, nullptr); ASSERT_TRUE(st.ok()); } } @@ -179,7 +179,7 @@ TEST_F(SegmentFooterCacheTest, TestSemgnetFooterPBPage) { StoragePageCache cache(16 * 2048, 0, 0, 16); for (auto segment_ptr : _segments) { std::shared_ptr<segment_v2::SegmentFooterPB> footer; - Status st = segment_ptr->_get_segment_footer(footer); + Status st = segment_ptr->_get_segment_footer(footer, nullptr); ASSERT_TRUE(st.ok()); PageCacheHandle cache_handle; cache.insert(segment_ptr->get_segment_footer_cache_key(), footer, footer->ByteSizeLong(), @@ -192,4 +192,4 @@ TEST_F(SegmentFooterCacheTest, TestSemgnetFooterPBPage) { } } -} // namespace doris \ No newline at end of file +} // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org