This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 3d7445c5b1e [chore](olap) Unify the parameters into SegmentWriterOptions (#38476) 3d7445c5b1e is described below commit 3d7445c5b1e4227b14a786592d8483442011e8f2 Author: walter <w41te...@gmail.com> AuthorDate: Thu Aug 1 10:00:05 2024 +0800 [chore](olap) Unify the parameters into SegmentWriterOptions (#38476) --- be/src/olap/rowset/beta_rowset_writer.cpp | 8 +++++--- be/src/olap/rowset/segment_creator.cpp | 9 +++++---- be/src/olap/rowset/segment_v2/segment_writer.cpp | 23 +++++++++------------- be/src/olap/rowset/segment_v2/segment_writer.h | 12 ++++------- .../rowset/segment_v2/vertical_segment_writer.cpp | 14 +++++++------ .../rowset/segment_v2/vertical_segment_writer.h | 5 ++--- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 7 ++++--- be/test/olap/delete_bitmap_calculator_test.cpp | 3 +-- 8 files changed, 38 insertions(+), 43 deletions(-) diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 1f07ce098fa..318d7d60502 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -861,10 +861,12 @@ Status BetaRowsetWriter::_create_segment_writer_for_segcompaction( writer_options.rowset_ctx = &_context; writer_options.write_type = _context.write_type; writer_options.write_type = DataWriteType::TYPE_COMPACTION; + writer_options.max_rows_per_segment = _context.max_rows_per_segment; + writer_options.mow_ctx = _context.mow_context; - *writer = std::make_unique<segment_v2::SegmentWriter>( - file_writer.get(), _num_segcompacted, _context.tablet_schema, _context.tablet, - _context.data_dir, _context.max_rows_per_segment, writer_options, _context.mow_context); + *writer = std::make_unique<segment_v2::SegmentWriter>(file_writer.get(), _num_segcompacted, + _context.tablet_schema, _context.tablet, + _context.data_dir, writer_options); if (auto& seg_writer = _segcompaction_worker->get_file_writer(); seg_writer != nullptr && seg_writer->state() != io::FileWriter::State::CLOSED) { RETURN_IF_ERROR(_segcompaction_worker->get_file_writer()->close()); diff --git a/be/src/olap/rowset/segment_creator.cpp b/be/src/olap/rowset/segment_creator.cpp index 82313f988cb..40f6e8303fe 100644 --- a/be/src/olap/rowset/segment_creator.cpp +++ b/be/src/olap/rowset/segment_creator.cpp @@ -150,14 +150,15 @@ Status SegmentFlusher::_create_segment_writer(std::unique_ptr<segment_v2::Segmen writer_options.enable_unique_key_merge_on_write = _context.enable_unique_key_merge_on_write; writer_options.rowset_ctx = &_context; writer_options.write_type = _context.write_type; + writer_options.max_rows_per_segment = _context.max_rows_per_segment; + writer_options.mow_ctx = _context.mow_context; if (no_compression) { writer_options.compression_type = NO_COMPRESSION; } writer = std::make_unique<segment_v2::SegmentWriter>( segment_file_writer.get(), segment_id, _context.tablet_schema, _context.tablet, - _context.data_dir, _context.max_rows_per_segment, writer_options, _context.mow_context, - std::move(inverted_file_writer)); + _context.data_dir, writer_options, std::move(inverted_file_writer)); RETURN_IF_ERROR(_seg_files.add(segment_id, std::move(segment_file_writer))); auto s = writer->init(); if (!s.ok()) { @@ -187,14 +188,14 @@ Status SegmentFlusher::_create_segment_writer( writer_options.enable_unique_key_merge_on_write = _context.enable_unique_key_merge_on_write; writer_options.rowset_ctx = &_context; writer_options.write_type = _context.write_type; + writer_options.mow_ctx = _context.mow_context; if (no_compression) { writer_options.compression_type = NO_COMPRESSION; } writer = std::make_unique<segment_v2::VerticalSegmentWriter>( segment_file_writer.get(), segment_id, _context.tablet_schema, _context.tablet, - _context.data_dir, _context.max_rows_per_segment, writer_options, _context.mow_context, - std::move(inverted_file_writer)); + _context.data_dir, writer_options, std::move(inverted_file_writer)); RETURN_IF_ERROR(_seg_files.add(segment_id, std::move(segment_file_writer))); auto s = writer->init(); if (!s.ok()) { diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index bdfcaba8b8e..f21c5fcbab2 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -21,11 +21,6 @@ #include <gen_cpp/segment_v2.pb.h> #include <parallel_hashmap/phmap.h> -#include <algorithm> -#include <ostream> -#include <unordered_map> -#include <utility> - // IWYU pragma: no_include <opentelemetry/common/threadlocal.h> #include "cloud/config.h" #include "common/compiler_util.h" // IWYU pragma: keep @@ -82,22 +77,22 @@ using namespace ErrorCode; const char* k_segment_magic = "D0R1"; const uint32_t k_segment_magic_length = 4; +inline std::string segment_mem_tracker_name(uint32_t segment_id) { + return "SegmentWriter:Segment-" + std::to_string(segment_id); +} + SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, - DataDir* data_dir, uint32_t max_row_per_segment, - const SegmentWriterOptions& opts, - std::shared_ptr<MowContext> mow_context, + DataDir* data_dir, const SegmentWriterOptions& opts, io::FileWriterPtr inverted_file_writer) : _segment_id(segment_id), _tablet_schema(std::move(tablet_schema)), _tablet(std::move(tablet)), _data_dir(data_dir), - _max_row_per_segment(max_row_per_segment), _opts(opts), _file_writer(file_writer), - _mem_tracker(std::make_unique<MemTracker>("SegmentWriter:Segment-" + - std::to_string(segment_id))), - _mow_context(std::move(mow_context)) { + _mem_tracker(std::make_unique<MemTracker>(segment_mem_tracker_name(segment_id))), + _mow_context(std::move(opts.mow_ctx)) { CHECK_NOTNULL(file_writer); _num_key_columns = _tablet_schema->num_key_columns(); _num_short_key_columns = _tablet_schema->num_short_key_columns(); @@ -958,11 +953,11 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po int64_t SegmentWriter::max_row_to_add(size_t row_avg_size_in_bytes) { auto segment_size = estimate_segment_size(); if (PREDICT_FALSE(segment_size >= MAX_SEGMENT_SIZE || - _num_rows_written >= _max_row_per_segment)) { + _num_rows_written >= _opts.max_rows_per_segment)) { return 0; } int64_t size_rows = ((int64_t)MAX_SEGMENT_SIZE - (int64_t)segment_size) / row_avg_size_in_bytes; - int64_t count_rows = (int64_t)_max_row_per_segment - _num_rows_written; + int64_t count_rows = (int64_t)_opts.max_rows_per_segment - _num_rows_written; return std::min(size_rows, count_rows); } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index 9c667ee92fc..41c3d5da3a7 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -23,18 +23,14 @@ #include <stddef.h> #include <cstdint> -#include <functional> #include <map> #include <memory> // unique_ptr #include <string> -#include <unordered_set> #include <vector> #include "common/status.h" // Status #include "gen_cpp/segment_v2.pb.h" -#include "gutil/macros.h" #include "gutil/strings/substitute.h" -#include "io/fs/file_system.h" #include "olap/olap_define.h" #include "olap/rowset/segment_v2/column_writer.h" #include "olap/tablet.h" @@ -71,11 +67,13 @@ extern const uint32_t k_segment_magic_length; struct SegmentWriterOptions { uint32_t num_rows_per_block = 1024; + uint32_t max_rows_per_segment = UINT32_MAX; bool enable_unique_key_merge_on_write = false; CompressionTypePB compression_type = UNKNOWN_COMPRESSION; RowsetWriterContext* rowset_ctx = nullptr; DataWriteType write_type = DataWriteType::TYPE_DEFAULT; + std::shared_ptr<MowContext> mow_ctx; }; using TabletSharedPtr = std::shared_ptr<Tablet>; @@ -84,8 +82,7 @@ class SegmentWriter { public: explicit SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, DataDir* data_dir, - uint32_t max_row_per_segment, const SegmentWriterOptions& opts, - std::shared_ptr<MowContext> mow_context, + const SegmentWriterOptions& opts, io::FileWriterPtr inverted_file_writer = nullptr); ~SegmentWriter(); @@ -120,7 +117,7 @@ public: Status finalize(uint64_t* segment_file_size, uint64_t* index_size); - uint32_t get_segment_id() { return _segment_id; } + uint32_t get_segment_id() const { return _segment_id; } Status finalize_columns_data(); Status finalize_columns_index(uint64_t* index_size); @@ -192,7 +189,6 @@ private: TabletSchemaSPtr _tablet_schema; BaseTabletSPtr _tablet; DataDir* _data_dir = nullptr; - uint32_t _max_row_per_segment; SegmentWriterOptions _opts; // Not owned. owned by RowsetWriter or SegmentFlusher diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index ba1bfcf3535..34cfed8502f 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -20,7 +20,6 @@ #include <gen_cpp/segment_v2.pb.h> #include <parallel_hashmap/phmap.h> -#include <algorithm> #include <cassert> #include <memory> #include <ostream> @@ -80,11 +79,14 @@ using namespace ErrorCode; static const char* k_segment_magic = "D0R1"; static const uint32_t k_segment_magic_length = 4; +inline std::string vertical_segment_writer_mem_tracker_name(uint32_t segment_id) { + return "VerticalSegmentWriter:Segment-" + std::to_string(segment_id); +} + VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, - DataDir* data_dir, uint32_t max_row_per_segment, + DataDir* data_dir, const VerticalSegmentWriterOptions& opts, - std::shared_ptr<MowContext> mow_context, io::FileWriterPtr inverted_file_writer) : _segment_id(segment_id), _tablet_schema(std::move(tablet_schema)), @@ -92,9 +94,9 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32 _data_dir(data_dir), _opts(opts), _file_writer(file_writer), - _mem_tracker(std::make_unique<MemTracker>("VerticalSegmentWriter:Segment-" + - std::to_string(segment_id))), - _mow_context(std::move(mow_context)) { + _mem_tracker(std::make_unique<MemTracker>( + vertical_segment_writer_mem_tracker_name(segment_id))), + _mow_context(std::move(opts.mow_ctx)) { CHECK_NOTNULL(file_writer); _num_key_columns = _tablet_schema->num_key_columns(); _num_short_key_columns = _tablet_schema->num_short_key_columns(); diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h index 8068b3e44be..c52deea40a0 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h @@ -68,6 +68,7 @@ struct VerticalSegmentWriterOptions { RowsetWriterContext* rowset_ctx = nullptr; DataWriteType write_type = DataWriteType::TYPE_DEFAULT; + std::shared_ptr<MowContext> mow_ctx; }; struct RowsInBlock { @@ -80,9 +81,7 @@ class VerticalSegmentWriter { public: explicit VerticalSegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, - DataDir* data_dir, uint32_t max_row_per_segment, - const VerticalSegmentWriterOptions& opts, - std::shared_ptr<MowContext> mow_context, + DataDir* data_dir, const VerticalSegmentWriterOptions& opts, io::FileWriterPtr inverted_file_writer = nullptr); ~VerticalSegmentWriter(); diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index 863f0f597aa..1db74843697 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -186,9 +186,10 @@ Status VerticalBetaRowsetWriter<T>::_create_segment_writer( segment_v2::SegmentWriterOptions writer_options; writer_options.enable_unique_key_merge_on_write = context.enable_unique_key_merge_on_write; writer_options.rowset_ctx = &context; - *writer = std::make_unique<segment_v2::SegmentWriter>( - file_writer.get(), seg_id, context.tablet_schema, context.tablet, context.data_dir, - context.max_rows_per_segment, writer_options, nullptr); + writer_options.max_rows_per_segment = context.max_rows_per_segment; + *writer = std::make_unique<segment_v2::SegmentWriter>(file_writer.get(), seg_id, + context.tablet_schema, context.tablet, + context.data_dir, writer_options); RETURN_IF_ERROR(this->_seg_files.add(seg_id, std::move(file_writer))); auto s = (*writer)->init(column_ids, is_key); diff --git a/be/test/olap/delete_bitmap_calculator_test.cpp b/be/test/olap/delete_bitmap_calculator_test.cpp index 00856f69f10..7e527078613 100644 --- a/be/test/olap/delete_bitmap_calculator_test.cpp +++ b/be/test/olap/delete_bitmap_calculator_test.cpp @@ -103,8 +103,7 @@ public: io::FileWriterPtr file_writer; Status st = fs->create_file(path, &file_writer); EXPECT_TRUE(st.ok()); - SegmentWriter writer(file_writer.get(), segment_id, build_schema, nullptr, nullptr, - INT32_MAX, opts, nullptr); + SegmentWriter writer(file_writer.get(), segment_id, build_schema, nullptr, nullptr, opts); st = writer.init(); EXPECT_TRUE(st.ok()); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org