This is an automated email from the ASF dual-hosted git repository. zhangchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b602369482 [Refactor](merge-on-write) extract common logic in to `Tablet::_get_segment_column_iterator` (#24048) b602369482 is described below commit b602369482cefc2d36cece656f4046ea15b3a3d1 Author: bobhan1 <bh2444151...@outlook.com> AuthorDate: Tue Sep 12 11:37:36 2023 +0800 [Refactor](merge-on-write) extract common logic in to `Tablet::_get_segment_column_iterator` (#24048) --- be/src/olap/tablet.cpp | 116 +++++++++++++++++-------------------------------- be/src/olap/tablet.h | 4 ++ 2 files changed, 44 insertions(+), 76 deletions(-) diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 7617c80334..6d806b1a94 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -2638,16 +2638,9 @@ void Tablet::update_max_version_schema(const TabletSchemaSPtr& tablet_schema) { } } -// fetch value by row column -Status Tablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset, uint32_t segid, - const std::vector<uint32_t>& rowids, - const std::vector<uint32_t>& cids, - vectorized::Block& block) { - // read row data - BetaRowsetSharedPtr rowset = std::static_pointer_cast<BetaRowset>(input_rowset); - CHECK(rowset); - - const TabletSchemaSPtr tablet_schema = rowset->tablet_schema(); +Status Tablet::_get_segment_column_iterator( + const BetaRowsetSharedPtr& rowset, uint32_t segid, const TabletColumn& target_column, + std::unique_ptr<segment_v2::ColumnIterator>* column_iterator, OlapReaderStatistics* stats) { SegmentCacheHandle segment_cache; RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset, &segment_cache, true)); // find segment @@ -2658,26 +2651,38 @@ Status Tablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset, uint return Status::NotFound(fmt::format("rowset {} 's segemnt not found, seg_id {}", rowset->rowset_id().to_string(), segid)); } - // read from segment column by column, row by row segment_v2::SegmentSharedPtr segment = *it; + RETURN_IF_ERROR(segment->new_column_iterator(target_column, column_iterator)); + segment_v2::ColumnIteratorOptions opt; + opt.file_reader = segment->file_reader().get(); + opt.stats = stats; + opt.use_page_cache = !config::disable_storage_page_cache; + opt.io_ctx.reader_type = ReaderType::READER_QUERY; + (*column_iterator)->init(opt); + return Status::OK(); +} + +// fetch value by row column +Status Tablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset, uint32_t segid, + const std::vector<uint32_t>& rowids, + const std::vector<uint32_t>& cids, + vectorized::Block& block) { MonotonicStopWatch watch; watch.start(); Defer _defer([&]() { LOG_EVERY_N(INFO, 500) << "fetch_value_by_rowids, cost(us):" << watch.elapsed_time() / 1000 << ", row_batch_size:" << rowids.size(); }); + + BetaRowsetSharedPtr rowset = std::static_pointer_cast<BetaRowset>(input_rowset); + CHECK(rowset); + const TabletSchemaSPtr tablet_schema = rowset->tablet_schema(); CHECK(tablet_schema->store_row_column()); - // create _source column std::unique_ptr<segment_v2::ColumnIterator> column_iterator; - RETURN_IF_ERROR(segment->new_column_iterator(tablet_schema->column(BeConsts::ROW_STORE_COL), - &column_iterator)); - segment_v2::ColumnIteratorOptions opt; OlapReaderStatistics stats; - opt.file_reader = segment->file_reader().get(); - opt.stats = &stats; - opt.use_page_cache = !config::disable_storage_page_cache; - opt.io_ctx.reader_type = ReaderType::READER_QUERY; - column_iterator->init(opt); + RETURN_IF_ERROR(_get_segment_column_iterator(rowset, segid, + tablet_schema->column(BeConsts::ROW_STORE_COL), + &column_iterator, &stats)); // get and parse tuple row vectorized::MutableColumnPtr column_ptr = vectorized::ColumnString::create(); RETURN_IF_ERROR(column_iterator->read_by_rowids(rowids.data(), rowids.size(), column_ptr)); @@ -2701,38 +2706,20 @@ Status Tablet::fetch_value_by_rowids(RowsetSharedPtr input_rowset, uint32_t segi const std::vector<uint32_t>& rowids, const TabletColumn& tablet_column, vectorized::MutableColumnPtr& dst) { - // read row data - BetaRowsetSharedPtr rowset = std::static_pointer_cast<BetaRowset>(input_rowset); - CHECK(rowset); - - SegmentCacheHandle segment_cache; - RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset, &segment_cache, true)); - // find segment - auto it = std::find_if( - segment_cache.get_segments().begin(), segment_cache.get_segments().end(), - [&segid](const segment_v2::SegmentSharedPtr& seg) { return seg->id() == segid; }); - if (it == segment_cache.get_segments().end()) { - return Status::NotFound(fmt::format("rowset {} 's segemnt not found, seg_id {}", - rowset->rowset_id().to_string(), segid)); - } - // read from segment column by column, row by row - segment_v2::SegmentSharedPtr segment = *it; MonotonicStopWatch watch; watch.start(); Defer _defer([&]() { LOG_EVERY_N(INFO, 500) << "fetch_value_by_rowids, cost(us):" << watch.elapsed_time() / 1000 << ", row_batch_size:" << rowids.size(); }); - // create _source column - std::unique_ptr<segment_v2::ColumnIterator> column_iterator = nullptr; - RETURN_IF_ERROR(segment->new_column_iterator(tablet_column, &column_iterator)); - segment_v2::ColumnIteratorOptions opt; + + // read row data + BetaRowsetSharedPtr rowset = std::static_pointer_cast<BetaRowset>(input_rowset); + CHECK(rowset); + std::unique_ptr<segment_v2::ColumnIterator> column_iterator; OlapReaderStatistics stats; - opt.file_reader = segment->file_reader().get(); - opt.stats = &stats; - opt.use_page_cache = !config::disable_storage_page_cache; - opt.io_ctx.reader_type = ReaderType::READER_QUERY; - column_iterator->init(opt); + RETURN_IF_ERROR( + _get_segment_column_iterator(rowset, segid, tablet_column, &column_iterator, &stats)); RETURN_IF_ERROR(column_iterator->read_by_rowids(rowids.data(), rowids.size(), dst)); return Status::OK(); } @@ -2741,45 +2728,22 @@ Status Tablet::lookup_row_data(const Slice& encoded_key, const RowLocation& row_ RowsetSharedPtr input_rowset, const TupleDescriptor* desc, OlapReaderStatistics& stats, std::string& values, bool write_to_cache) { - // read row data - BetaRowsetSharedPtr rowset = std::static_pointer_cast<BetaRowset>(input_rowset); - if (!rowset) { - return Status::NotFound( - fmt::format("rowset {} not found", row_location.rowset_id.to_string())); - } - - const TabletSchemaSPtr tablet_schema = rowset->tablet_schema(); - SegmentCacheHandle segment_cache; - RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset, &segment_cache, true)); - // find segment - auto it = std::find_if(segment_cache.get_segments().begin(), segment_cache.get_segments().end(), - [&row_location](const segment_v2::SegmentSharedPtr& seg) { - return seg->id() == row_location.segment_id; - }); - if (it == segment_cache.get_segments().end()) { - return Status::NotFound(fmt::format("rowset {} 's segemnt not found, seg_id {}", - row_location.rowset_id.to_string(), - row_location.segment_id)); - } - // read from segment column by column, row by row - segment_v2::SegmentSharedPtr segment = *it; - size_t row_size = 0; MonotonicStopWatch watch; + size_t row_size = 1; watch.start(); Defer _defer([&]() { LOG_EVERY_N(INFO, 500) << "get a single_row, cost(us):" << watch.elapsed_time() / 1000 << ", row_size:" << row_size; }); + + BetaRowsetSharedPtr rowset = std::static_pointer_cast<BetaRowset>(input_rowset); + CHECK(rowset); + const TabletSchemaSPtr tablet_schema = rowset->tablet_schema(); CHECK(tablet_schema->store_row_column()); - // create _source column std::unique_ptr<segment_v2::ColumnIterator> column_iterator; - RETURN_IF_ERROR(segment->new_column_iterator(tablet_schema->column(BeConsts::ROW_STORE_COL), - &column_iterator)); - segment_v2::ColumnIteratorOptions opt; - opt.file_reader = segment->file_reader().get(); - opt.stats = &stats; - opt.use_page_cache = !config::disable_storage_page_cache; - column_iterator->init(opt); + RETURN_IF_ERROR(_get_segment_column_iterator(rowset, row_location.segment_id, + tablet_schema->column(BeConsts::ROW_STORE_COL), + &column_iterator, &stats)); // get and parse tuple row vectorized::MutableColumnPtr column_ptr = vectorized::ColumnString::create(); std::vector<segment_v2::rowid_t> rowids {static_cast<segment_v2::rowid_t>(row_location.row_id)}; diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index a1684fe9e6..99dfd445b1 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -555,6 +555,10 @@ public: Status check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version, int64_t txn_id, const RowsetIdUnorderedSet& rowset_ids, std::vector<RowsetSharedPtr>* rowsets = nullptr); + Status _get_segment_column_iterator( + const BetaRowsetSharedPtr& rowset, uint32_t segid, const TabletColumn& target_column, + std::unique_ptr<segment_v2::ColumnIterator>* column_iterator, + OlapReaderStatistics* stats); private: Status _init_once_action(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org