This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 56b3ffef27a [enhancement](statistics) Add segment footer open stats to 
profile (#50326)
56b3ffef27a is described below

commit 56b3ffef27af3acc2335bffd28ed691c5952bbab
Author: Siyang Tang <tangsiy...@selectdb.com>
AuthorDate: Fri Apr 25 09:49:48 2025 +0800

    [enhancement](statistics) Add segment footer open stats to profile (#50326)
---
 be/src/olap/rowset/beta_rowset.cpp                 |  7 +++---
 be/src/olap/rowset/beta_rowset.h                   |  4 ++-
 .../segment_v2/lazy_init_segment_iterator.cpp      |  9 +++----
 be/src/olap/rowset/segment_v2/segment.cpp          | 29 ++++++++++++----------
 be/src/olap/rowset/segment_v2/segment.h            | 12 +++++----
 be/src/olap/segment_loader.cpp                     |  2 +-
 be/test/olap/date_bloom_filter_test.cpp            |  8 +++---
 be/test/olap/segment_footer_cache_test.cpp         |  8 +++---
 8 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/be/src/olap/rowset/beta_rowset.cpp 
b/be/src/olap/rowset/beta_rowset.cpp
index b25b261c2bb..850a3480708 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -171,14 +171,15 @@ Status BetaRowset::load_segments(int64_t seg_id_begin, 
int64_t seg_id_end,
     int64_t seg_id = seg_id_begin;
     while (seg_id < seg_id_end) {
         std::shared_ptr<segment_v2::Segment> segment;
-        RETURN_IF_ERROR(load_segment(seg_id, &segment));
+        RETURN_IF_ERROR(load_segment(seg_id, nullptr, &segment));
         segments->push_back(std::move(segment));
         seg_id++;
     }
     return Status::OK();
 }
 
-Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* 
segment) {
+Status BetaRowset::load_segment(int64_t seg_id, OlapReaderStatistics* stats,
+                                segment_v2::SegmentSharedPtr* segment) {
     auto fs = _rowset_meta->fs();
     if (!fs) {
         return Status::Error<INIT_FAILED>("get fs failed");
@@ -196,7 +197,7 @@ Status BetaRowset::load_segment(int64_t seg_id, 
segment_v2::SegmentSharedPtr* se
 
     auto s = segment_v2::Segment::open(fs, seg_path, 
_rowset_meta->tablet_id(), seg_id, rowset_id(),
                                        _schema, reader_options, segment,
-                                       
_rowset_meta->inverted_index_file_info(seg_id));
+                                       
_rowset_meta->inverted_index_file_info(seg_id), stats);
     if (!s.ok()) {
         LOG(WARNING) << "failed to open segment. " << seg_path << " under 
rowset " << rowset_id()
                      << " : " << s.to_string();
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index 4b1388a1f08..f3a8230383d 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "common/status.h"
+#include "olap/olap_common.h"
 #include "olap/rowset/rowset.h"
 #include "olap/rowset/rowset_meta.h"
 #include "olap/rowset/rowset_reader.h"
@@ -76,7 +77,8 @@ public:
     Status load_segments(int64_t seg_id_begin, int64_t seg_id_end,
                          std::vector<segment_v2::SegmentSharedPtr>* segments);
 
-    Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment);
+    Status load_segment(int64_t seg_id, OlapReaderStatistics* read_stats,
+                        segment_v2::SegmentSharedPtr* segment);
 
     Status get_segments_size(std::vector<size_t>* segments_size);
 
diff --git a/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp
index 77e2310fc48..af663b36a39 100644
--- a/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp
@@ -30,9 +30,8 @@ 
LazyInitSegmentIterator::LazyInitSegmentIterator(BetaRowsetSharedPtr rowset, int
           _schema(std::move(schema)),
           _read_options(opts) {}
 
-/// Here do not use the argument of `opts`,
-/// see where the iterator is created in 
`BetaRowsetReader::get_segment_iterators`
-Status LazyInitSegmentIterator::init(const StorageReadOptions& /*opts*/) {
+/// See where the iterator is created in 
`BetaRowsetReader::get_segment_iterators`
+Status LazyInitSegmentIterator::init(const StorageReadOptions& opts) {
     _need_lazy_init = false;
     if (_inner_iterator) {
         return Status::OK();
@@ -42,7 +41,7 @@ Status LazyInitSegmentIterator::init(const 
StorageReadOptions& /*opts*/) {
     {
         SegmentCacheHandle segment_cache_handle;
         RETURN_IF_ERROR(SegmentLoader::instance()->load_segment(
-                _rowset, _segment_id, &segment_cache_handle, 
_should_use_cache, false));
+                _rowset, _segment_id, &segment_cache_handle, 
_should_use_cache, false, opts.stats));
         const auto& tmp_segments = segment_cache_handle.get_segments();
         segment = tmp_segments[0];
     }
@@ -50,4 +49,4 @@ Status LazyInitSegmentIterator::init(const 
StorageReadOptions& /*opts*/) {
     return _inner_iterator->init(_read_options);
 }
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 90135b5826d..6c283de3285 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -81,9 +81,9 @@ class InvertedIndexIterator;
 Status Segment::open(io::FileSystemSPtr fs, const std::string& path, int64_t 
tablet_id,
                      uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr 
tablet_schema,
                      const io::FileReaderOptions& reader_options, 
std::shared_ptr<Segment>* output,
-                     InvertedIndexFileInfo idx_file_info) {
+                     InvertedIndexFileInfo idx_file_info, 
OlapReaderStatistics* stats) {
     auto s = _open(fs, path, segment_id, rowset_id, tablet_schema, 
reader_options, output,
-                   idx_file_info);
+                   idx_file_info, stats);
     if (!s.ok()) {
         if (!config::is_cloud_mode()) {
             auto res = ExecEnv::get_tablet(tablet_id);
@@ -101,14 +101,14 @@ Status Segment::open(io::FileSystemSPtr fs, const 
std::string& path, int64_t tab
 Status Segment::_open(io::FileSystemSPtr fs, const std::string& path, uint32_t 
segment_id,
                       RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
                       const io::FileReaderOptions& reader_options, 
std::shared_ptr<Segment>* output,
-                      InvertedIndexFileInfo idx_file_info) {
+                      InvertedIndexFileInfo idx_file_info, 
OlapReaderStatistics* stats) {
     io::FileReaderSPtr file_reader;
     RETURN_IF_ERROR(fs->open_file(path, &file_reader, &reader_options));
     std::shared_ptr<Segment> segment(
             new Segment(segment_id, rowset_id, std::move(tablet_schema), 
idx_file_info));
     segment->_fs = fs;
     segment->_file_reader = std::move(file_reader);
-    auto st = segment->_open();
+    auto st = segment->_open(stats);
     TEST_INJECTION_POINT_CALLBACK("Segment::open:corruption", &st);
     if (st.is<ErrorCode::CORRUPTION>() &&
         reader_options.cache_type == io::FileCachePolicy::FILE_BLOCK_CACHE) {
@@ -121,7 +121,7 @@ Status Segment::_open(io::FileSystemSPtr fs, const 
std::string& path, uint32_t s
 
         RETURN_IF_ERROR(fs->open_file(path, &file_reader, &reader_options));
         segment->_file_reader = std::move(file_reader);
-        st = segment->_open();
+        st = segment->_open(stats);
         TEST_INJECTION_POINT_CALLBACK("Segment::open:corruption1", &st);
         if (st.is<ErrorCode::CORRUPTION>()) { // corrupt again
             LOG(WARNING) << "failed to try to read remote source file again 
with cache support,"
@@ -134,7 +134,7 @@ Status Segment::_open(io::FileSystemSPtr fs, const 
std::string& path, uint32_t s
             opt.cache_type = io::FileCachePolicy::NO_CACHE; // skip cache
             RETURN_IF_ERROR(fs->open_file(path, &file_reader, &opt));
             segment->_file_reader = std::move(file_reader);
-            st = segment->_open();
+            st = segment->_open(stats);
             if (!st.ok()) {
                 LOG(WARNING) << "failed to try to read remote source file 
directly,"
                              << " file path: " << path
@@ -177,9 +177,9 @@ void Segment::update_metadata_size() {
     _tracked_meta_mem_usage = _meta_mem_usage;
 }
 
-Status Segment::_open() {
+Status Segment::_open(OlapReaderStatistics* stats) {
     std::shared_ptr<SegmentFooterPB> footer_pb_shared;
-    RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared));
+    RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared, stats));
 
     _pk_index_meta.reset(
             footer_pb_shared->has_primary_key_index_meta()
@@ -390,7 +390,8 @@ Status Segment::_write_error_file(size_t file_size, size_t 
offset, size_t bytes_
     return Status::OK(); // already exists
 };
 
-Status Segment::_parse_footer(std::shared_ptr<SegmentFooterPB>& footer) {
+Status Segment::_parse_footer(std::shared_ptr<SegmentFooterPB>& footer,
+                              OlapReaderStatistics* stats) {
     // Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), 
MagicNumber(4)
     auto file_size = _file_reader->size();
     if (file_size < 12) {
@@ -402,7 +403,8 @@ Status 
Segment::_parse_footer(std::shared_ptr<SegmentFooterPB>& footer) {
     uint8_t fixed_buf[12];
     size_t bytes_read = 0;
     // TODO(plat1ko): Support session variable `enable_file_cache`
-    io::IOContext io_ctx {.is_index_data = true};
+    io::IOContext io_ctx {.is_index_data = true,
+                          .file_cache_stats = stats ? &stats->file_cache_stats 
: nullptr};
     RETURN_IF_ERROR(
             _file_reader->read_at(file_size - 12, Slice(fixed_buf, 12), 
&bytes_read, &io_ctx));
     DCHECK_EQ(bytes_read, 12);
@@ -606,7 +608,7 @@ Status 
Segment::_create_column_readers_once(OlapReaderStatistics* stats) {
     SCOPED_RAW_TIMER(&stats->segment_create_column_readers_timer_ns);
     return _create_column_readers_once_call.call([&] {
         std::shared_ptr<SegmentFooterPB> footer_pb_shared;
-        RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared));
+        RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared, stats));
         return _create_column_readers(*footer_pb_shared);
     });
 }
@@ -1185,7 +1187,8 @@ Status Segment::seek_and_read_by_rowid(const 
TabletSchema& schema, SlotDescripto
     return Status::OK();
 }
 
-Status Segment::_get_segment_footer(std::shared_ptr<SegmentFooterPB>& 
footer_pb) {
+Status Segment::_get_segment_footer(std::shared_ptr<SegmentFooterPB>& 
footer_pb,
+                                    OlapReaderStatistics* stats) {
     std::shared_ptr<SegmentFooterPB> footer_pb_shared = _footer_pb.lock();
     if (footer_pb_shared != nullptr) {
         footer_pb = footer_pb_shared;
@@ -1205,7 +1208,7 @@ Status 
Segment::_get_segment_footer(std::shared_ptr<SegmentFooterPB>& footer_pb)
 
     if (!segment_footer_cache->lookup(cache_key, &cache_handle,
                                       segment_v2::PageTypePB::DATA_PAGE)) {
-        RETURN_IF_ERROR(_parse_footer(footer_pb_shared));
+        RETURN_IF_ERROR(_parse_footer(footer_pb_shared, stats));
         segment_footer_cache->insert(cache_key, footer_pb_shared, 
footer_pb_shared->ByteSizeLong(),
                                      &cache_handle, 
segment_v2::PageTypePB::DATA_PAGE);
     } else {
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index 382db37976c..4b08b9c0d00 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -83,7 +83,8 @@ public:
     static Status open(io::FileSystemSPtr fs, const std::string& path, int64_t 
tablet_id,
                        uint32_t segment_id, RowsetId rowset_id, 
TabletSchemaSPtr tablet_schema,
                        const io::FileReaderOptions& reader_options,
-                       std::shared_ptr<Segment>* output, InvertedIndexFileInfo 
idx_file_info = {});
+                       std::shared_ptr<Segment>* output, InvertedIndexFileInfo 
idx_file_info = {},
+                       OlapReaderStatistics* stats = nullptr);
 
     static io::UInt128Wrapper file_cache_key(std::string_view rowset_id, 
uint32_t seg_id);
     io::UInt128Wrapper file_cache_key() const {
@@ -225,10 +226,11 @@ private:
     static Status _open(io::FileSystemSPtr fs, const std::string& path, 
uint32_t segment_id,
                         RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
                         const io::FileReaderOptions& reader_options,
-                        std::shared_ptr<Segment>* output, 
InvertedIndexFileInfo idx_file_info);
+                        std::shared_ptr<Segment>* output, 
InvertedIndexFileInfo idx_file_info,
+                        OlapReaderStatistics* stats);
     // open segment file and read the minimum amount of necessary information 
(footer)
-    Status _open();
-    Status _parse_footer(std::shared_ptr<SegmentFooterPB>& footer);
+    Status _open(OlapReaderStatistics* stats);
+    Status _parse_footer(std::shared_ptr<SegmentFooterPB>& footer, 
OlapReaderStatistics* stats);
     Status _create_column_readers(const SegmentFooterPB& footer);
     Status _load_pk_bloom_filter(OlapReaderStatistics* stats);
     ColumnReader* _get_column_reader(const TabletColumn& col);
@@ -245,7 +247,7 @@ private:
 
     Status _create_column_readers_once(OlapReaderStatistics* stats);
 
-    Status _get_segment_footer(std::shared_ptr<SegmentFooterPB>&);
+    Status _get_segment_footer(std::shared_ptr<SegmentFooterPB>&, 
OlapReaderStatistics* stats);
 
     StoragePageCache::CacheKey get_segment_footer_cache_key() const;
 
diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp
index 1bd21ad4e55..5385a2d5cf0 100644
--- a/be/src/olap/segment_loader.cpp
+++ b/be/src/olap/segment_loader.cpp
@@ -67,7 +67,7 @@ Status SegmentLoader::load_segment(const BetaRowsetSharedPtr& 
rowset, int64_t se
     }
     // If the segment is not healthy, then will create a new segment and will 
replace the unhealthy one in SegmentCache.
     segment_v2::SegmentSharedPtr segment;
-    RETURN_IF_ERROR(rowset->load_segment(segment_id, &segment));
+    RETURN_IF_ERROR(rowset->load_segment(segment_id, index_load_stats, 
&segment));
     if (need_load_pk_index_and_bf) {
         RETURN_IF_ERROR(segment->load_pk_index_and_bf(index_load_stats));
     }
diff --git a/be/test/olap/date_bloom_filter_test.cpp 
b/be/test/olap/date_bloom_filter_test.cpp
index f0e8eb11579..e902e61cf21 100644
--- a/be/test/olap/date_bloom_filter_test.cpp
+++ b/be/test/olap/date_bloom_filter_test.cpp
@@ -150,9 +150,9 @@ TEST_F(DateBloomFilterTest, query_index_test) {
     EXPECT_TRUE(_tablet->add_rowset(rowset).ok());
 
     segment_v2::SegmentSharedPtr segment;
-    EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok());
+    EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, nullptr, 
&segment).ok());
     std::shared_ptr<SegmentFooterPB> footer_pb_shared;
-    auto st = segment->_get_segment_footer(footer_pb_shared);
+    auto st = segment->_get_segment_footer(footer_pb_shared, nullptr);
     EXPECT_TRUE(st.ok());
     st = segment->_create_column_readers(*footer_pb_shared);
     EXPECT_TRUE(st.ok());
@@ -230,9 +230,9 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) {
     EXPECT_TRUE(_tablet->add_rowset(rowset).ok());
 
     segment_v2::SegmentSharedPtr segment;
-    EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok());
+    EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, nullptr, 
&segment).ok());
     std::shared_ptr<SegmentFooterPB> footer_pb_shared;
-    auto st = segment->_get_segment_footer(footer_pb_shared);
+    auto st = segment->_get_segment_footer(footer_pb_shared, nullptr);
     EXPECT_TRUE(st.ok());
     st = segment->_create_column_readers(*(footer_pb_shared));
     EXPECT_TRUE(st.ok());
diff --git a/be/test/olap/segment_footer_cache_test.cpp 
b/be/test/olap/segment_footer_cache_test.cpp
index f0ed4960151..25b809c6055 100644
--- a/be/test/olap/segment_footer_cache_test.cpp
+++ b/be/test/olap/segment_footer_cache_test.cpp
@@ -151,13 +151,13 @@ private:
 TEST_F(SegmentFooterCacheTest, TestGetSegmentFooter) {
     for (auto segment_ptr : _segments) {
         std::shared_ptr<segment_v2::SegmentFooterPB> footer;
-        Status st = segment_ptr->_get_segment_footer(footer);
+        Status st = segment_ptr->_get_segment_footer(footer, nullptr);
         ASSERT_TRUE(st.ok());
     }
 
     for (auto segment_ptr : _segments) {
         std::shared_ptr<segment_v2::SegmentFooterPB> footer;
-        Status st = segment_ptr->_get_segment_footer(footer);
+        Status st = segment_ptr->_get_segment_footer(footer, nullptr);
         ASSERT_TRUE(st.ok());
     }
 }
@@ -179,7 +179,7 @@ TEST_F(SegmentFooterCacheTest, TestSemgnetFooterPBPage) {
     StoragePageCache cache(16 * 2048, 0, 0, 16);
     for (auto segment_ptr : _segments) {
         std::shared_ptr<segment_v2::SegmentFooterPB> footer;
-        Status st = segment_ptr->_get_segment_footer(footer);
+        Status st = segment_ptr->_get_segment_footer(footer, nullptr);
         ASSERT_TRUE(st.ok());
         PageCacheHandle cache_handle;
         cache.insert(segment_ptr->get_segment_footer_cache_key(), footer, 
footer->ByteSizeLong(),
@@ -192,4 +192,4 @@ TEST_F(SegmentFooterCacheTest, TestSemgnetFooterPBPage) {
     }
 }
 
-} // namespace doris
\ No newline at end of file
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to