This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 3986eb54467 [fix](rowset writer)check segment num when build rowset meta #30460 (#30804) 3986eb54467 is described below commit 3986eb544675a2829f4fa1a4f9dd9daf913957e3 Author: HHoflittlefish777 <77738092+hhoflittlefish...@users.noreply.github.com> AuthorDate: Sun Feb 4 20:26:01 2024 +0800 [fix](rowset writer)check segment num when build rowset meta #30460 (#30804) --- be/src/olap/memtable.cpp | 8 ++++--- be/src/olap/rowset/beta_rowset_writer.cpp | 38 +++++++++++++++++++++++-------- be/src/olap/rowset/beta_rowset_writer.h | 5 ++-- be/src/olap/rowset/rowset_writer.h | 2 +- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index ea3c0fcf6b6..5d272c1a754 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -465,8 +465,10 @@ Status MemTable::_generate_delete_bitmap(int32_t segment_id) { if (!_tablet->enable_unique_key_merge_on_write()) { return Status::OK(); } - auto rowset = _rowset_writer->build_tmp(); - auto beta_rowset = reinterpret_cast<BetaRowset*>(rowset.get()); + + RowsetSharedPtr rowset_ptr; + RETURN_IF_ERROR(_rowset_writer->build_tmp(rowset_ptr)); + auto beta_rowset = reinterpret_cast<BetaRowset*>(rowset_ptr.get()); std::vector<segment_v2::SegmentSharedPtr> segments; RETURN_IF_ERROR(beta_rowset->load_segments(segment_id, segment_id + 1, &segments)); std::vector<RowsetSharedPtr> specified_rowsets; @@ -475,7 +477,7 @@ Status MemTable::_generate_delete_bitmap(int32_t segment_id) { specified_rowsets = _tablet->get_rowset_by_ids(&_mow_context->rowset_ids); } OlapStopWatch watch; - RETURN_IF_ERROR(_tablet->calc_delete_bitmap(rowset, segments, specified_rowsets, + RETURN_IF_ERROR(_tablet->calc_delete_bitmap(rowset_ptr, segments, specified_rowsets, _mow_context->delete_bitmap, _mow_context->max_version, nullptr)); size_t total_rows = std::accumulate( diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index d09f7a7acf3..3c3f71917ad 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -442,6 +442,7 @@ Status BetaRowsetWriter::add_rowset(RowsetSharedPtr rowset) { _next_segment_id = _num_segment.load(); // append key_bounds to current rowset RETURN_IF_ERROR(rowset->get_segments_key_bounds(&_segments_encoded_key_bounds)); + // TODO update zonemap if (rowset->rowset_meta()->has_delete_predicate()) { _rowset_meta->set_delete_predicate(rowset->rowset_meta()->delete_predicate()); @@ -545,7 +546,7 @@ Status BetaRowsetWriter::build(RowsetSharedPtr& rowset) { // When building a rowset, we must ensure that the current _segment_writer has been // flushed, that is, the current _segment_writer is nullptr DCHECK(_segment_writer == nullptr) << "segment must be null when build rowset"; - _build_rowset_meta(_rowset_meta); + RETURN_IF_ERROR(_build_rowset_meta(_rowset_meta, true)); if (_rowset_meta->newest_write_timestamp() == -1) { _rowset_meta->set_newest_write_timestamp(UnixSeconds()); @@ -608,7 +609,8 @@ void BetaRowsetWriter::_build_rowset_meta_with_spec_field( rowset_meta->set_segments_key_bounds(segments_key_bounds); } -void BetaRowsetWriter::_build_rowset_meta(std::shared_ptr<RowsetMeta> rowset_meta) { +Status BetaRowsetWriter::_build_rowset_meta(std::shared_ptr<RowsetMeta> rowset_meta, + bool check_segment_num) { int64_t num_seg = _is_segcompacted() ? _num_segcompacted : _num_segment; int64_t num_rows_written = 0; int64_t total_data_size = 0; @@ -634,8 +636,18 @@ void BetaRowsetWriter::_build_rowset_meta(std::shared_ptr<RowsetMeta> rowset_met rowset_meta->set_segments_overlap(NONOVERLAPPING); } + if (check_segment_num) { + auto segments_encoded_key_bounds_size = segments_encoded_key_bounds.size(); + if (segments_encoded_key_bounds_size != num_seg) { + return Status::InternalError( + "segments_encoded_key_bounds_size should equal to _num_seg, " + "segments_encoded_key_bounds_size " + "is: {}, _num_seg is: {}", + segments_encoded_key_bounds_size, num_seg); + } + } + rowset_meta->set_num_segments(num_seg); - // TODO(zhangzhengyu): key_bounds.size() should equal num_seg, but currently not always rowset_meta->set_num_rows(num_rows_written + _num_rows_written); rowset_meta->set_total_disk_size(total_data_size + _total_data_size); rowset_meta->set_data_disk_size(total_data_size + _total_data_size); @@ -650,21 +662,27 @@ void BetaRowsetWriter::_build_rowset_meta(std::shared_ptr<RowsetMeta> rowset_met } else { rowset_meta->set_rowset_state(VISIBLE); } + + return Status::OK(); } -RowsetSharedPtr BetaRowsetWriter::build_tmp() { +Status BetaRowsetWriter::build_tmp(RowsetSharedPtr& rowset_ptr) { + Status status; std::shared_ptr<RowsetMeta> rowset_meta_ = std::make_shared<RowsetMeta>(); *rowset_meta_ = *_rowset_meta; - _build_rowset_meta(rowset_meta_); + status = _build_rowset_meta(rowset_meta_); + if (!status.ok()) { + LOG(WARNING) << "failed to build rowset meta, res=" << status; + return status; + } - RowsetSharedPtr rowset; - auto status = RowsetFactory::create_rowset(_context.tablet_schema, _context.rowset_dir, - rowset_meta_, &rowset); + status = RowsetFactory::create_rowset(_context.tablet_schema, _context.rowset_dir, rowset_meta_, + &rowset_ptr); if (!status.ok()) { LOG(WARNING) << "rowset init failed when build new rowset, res=" << status; - return nullptr; + return status; } - return rowset; + return Status::OK(); } Status BetaRowsetWriter::_do_create_segment_writer( diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h index 03b5e7d85ba..063b4fa0e66 100644 --- a/be/src/olap/rowset/beta_rowset_writer.h +++ b/be/src/olap/rowset/beta_rowset_writer.h @@ -89,7 +89,7 @@ public: // build a tmp rowset for load segment to calc delete_bitmap // for this segment - RowsetSharedPtr build_tmp() override; + Status build_tmp(RowsetSharedPtr& rowset_ptr) override; RowsetSharedPtr manual_build(const RowsetMetaSharedPtr& rowset_meta) override; @@ -149,7 +149,8 @@ private: const FlushContext* ctx = nullptr); Status _flush_segment_writer(std::unique_ptr<segment_v2::SegmentWriter>* writer, int64_t* flush_size = nullptr); - void _build_rowset_meta(std::shared_ptr<RowsetMeta> rowset_meta); + Status _build_rowset_meta(std::shared_ptr<RowsetMeta> rowset_meta, + bool check_segment_num = false); Status _segcompaction_if_necessary(); Status _segcompaction_rename_last_segments(); Status _load_noncompacted_segment(segment_v2::SegmentSharedPtr& segment, int32_t segment_id); diff --git a/be/src/olap/rowset/rowset_writer.h b/be/src/olap/rowset/rowset_writer.h index 2e38069bb89..6f8824b46ea 100644 --- a/be/src/olap/rowset/rowset_writer.h +++ b/be/src/olap/rowset/rowset_writer.h @@ -92,7 +92,7 @@ public: // we have to load segment data to build delete_bitmap for current segment, // so we build a tmp rowset ptr to load segment data. // real build will be called in DeltaWriter close_wait. - virtual RowsetSharedPtr build_tmp() = 0; + virtual Status build_tmp(RowsetSharedPtr& rowset_ptr) = 0; // For ordered rowset compaction, manual build rowset virtual RowsetSharedPtr manual_build(const RowsetMetaSharedPtr& rowset_meta) = 0; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org