This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d7445c5b1e [chore](olap) Unify the parameters into 
SegmentWriterOptions (#38476)
3d7445c5b1e is described below

commit 3d7445c5b1e4227b14a786592d8483442011e8f2
Author: walter <w41te...@gmail.com>
AuthorDate: Thu Aug 1 10:00:05 2024 +0800

    [chore](olap) Unify the parameters into SegmentWriterOptions (#38476)
---
 be/src/olap/rowset/beta_rowset_writer.cpp          |  8 +++++---
 be/src/olap/rowset/segment_creator.cpp             |  9 +++++----
 be/src/olap/rowset/segment_v2/segment_writer.cpp   | 23 +++++++++-------------
 be/src/olap/rowset/segment_v2/segment_writer.h     | 12 ++++-------
 .../rowset/segment_v2/vertical_segment_writer.cpp  | 14 +++++++------
 .../rowset/segment_v2/vertical_segment_writer.h    |  5 ++---
 be/src/olap/rowset/vertical_beta_rowset_writer.cpp |  7 ++++---
 be/test/olap/delete_bitmap_calculator_test.cpp     |  3 +--
 8 files changed, 38 insertions(+), 43 deletions(-)

diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp 
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 1f07ce098fa..318d7d60502 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -861,10 +861,12 @@ Status 
BetaRowsetWriter::_create_segment_writer_for_segcompaction(
     writer_options.rowset_ctx = &_context;
     writer_options.write_type = _context.write_type;
     writer_options.write_type = DataWriteType::TYPE_COMPACTION;
+    writer_options.max_rows_per_segment = _context.max_rows_per_segment;
+    writer_options.mow_ctx = _context.mow_context;
 
-    *writer = std::make_unique<segment_v2::SegmentWriter>(
-            file_writer.get(), _num_segcompacted, _context.tablet_schema, 
_context.tablet,
-            _context.data_dir, _context.max_rows_per_segment, writer_options, 
_context.mow_context);
+    *writer = std::make_unique<segment_v2::SegmentWriter>(file_writer.get(), 
_num_segcompacted,
+                                                          
_context.tablet_schema, _context.tablet,
+                                                          _context.data_dir, 
writer_options);
     if (auto& seg_writer = _segcompaction_worker->get_file_writer();
         seg_writer != nullptr && seg_writer->state() != 
io::FileWriter::State::CLOSED) {
         RETURN_IF_ERROR(_segcompaction_worker->get_file_writer()->close());
diff --git a/be/src/olap/rowset/segment_creator.cpp 
b/be/src/olap/rowset/segment_creator.cpp
index 82313f988cb..40f6e8303fe 100644
--- a/be/src/olap/rowset/segment_creator.cpp
+++ b/be/src/olap/rowset/segment_creator.cpp
@@ -150,14 +150,15 @@ Status 
SegmentFlusher::_create_segment_writer(std::unique_ptr<segment_v2::Segmen
     writer_options.enable_unique_key_merge_on_write = 
_context.enable_unique_key_merge_on_write;
     writer_options.rowset_ctx = &_context;
     writer_options.write_type = _context.write_type;
+    writer_options.max_rows_per_segment = _context.max_rows_per_segment;
+    writer_options.mow_ctx = _context.mow_context;
     if (no_compression) {
         writer_options.compression_type = NO_COMPRESSION;
     }
 
     writer = std::make_unique<segment_v2::SegmentWriter>(
             segment_file_writer.get(), segment_id, _context.tablet_schema, 
_context.tablet,
-            _context.data_dir, _context.max_rows_per_segment, writer_options, 
_context.mow_context,
-            std::move(inverted_file_writer));
+            _context.data_dir, writer_options, 
std::move(inverted_file_writer));
     RETURN_IF_ERROR(_seg_files.add(segment_id, 
std::move(segment_file_writer)));
     auto s = writer->init();
     if (!s.ok()) {
@@ -187,14 +188,14 @@ Status SegmentFlusher::_create_segment_writer(
     writer_options.enable_unique_key_merge_on_write = 
_context.enable_unique_key_merge_on_write;
     writer_options.rowset_ctx = &_context;
     writer_options.write_type = _context.write_type;
+    writer_options.mow_ctx = _context.mow_context;
     if (no_compression) {
         writer_options.compression_type = NO_COMPRESSION;
     }
 
     writer = std::make_unique<segment_v2::VerticalSegmentWriter>(
             segment_file_writer.get(), segment_id, _context.tablet_schema, 
_context.tablet,
-            _context.data_dir, _context.max_rows_per_segment, writer_options, 
_context.mow_context,
-            std::move(inverted_file_writer));
+            _context.data_dir, writer_options, 
std::move(inverted_file_writer));
     RETURN_IF_ERROR(_seg_files.add(segment_id, 
std::move(segment_file_writer)));
     auto s = writer->init();
     if (!s.ok()) {
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index bdfcaba8b8e..f21c5fcbab2 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -21,11 +21,6 @@
 #include <gen_cpp/segment_v2.pb.h>
 #include <parallel_hashmap/phmap.h>
 
-#include <algorithm>
-#include <ostream>
-#include <unordered_map>
-#include <utility>
-
 // IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
 #include "cloud/config.h"
 #include "common/compiler_util.h" // IWYU pragma: keep
@@ -82,22 +77,22 @@ using namespace ErrorCode;
 const char* k_segment_magic = "D0R1";
 const uint32_t k_segment_magic_length = 4;
 
+inline std::string segment_mem_tracker_name(uint32_t segment_id) {
+    return "SegmentWriter:Segment-" + std::to_string(segment_id);
+}
+
 SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id,
                              TabletSchemaSPtr tablet_schema, BaseTabletSPtr 
tablet,
-                             DataDir* data_dir, uint32_t max_row_per_segment,
-                             const SegmentWriterOptions& opts,
-                             std::shared_ptr<MowContext> mow_context,
+                             DataDir* data_dir, const SegmentWriterOptions& 
opts,
                              io::FileWriterPtr inverted_file_writer)
         : _segment_id(segment_id),
           _tablet_schema(std::move(tablet_schema)),
           _tablet(std::move(tablet)),
           _data_dir(data_dir),
-          _max_row_per_segment(max_row_per_segment),
           _opts(opts),
           _file_writer(file_writer),
-          _mem_tracker(std::make_unique<MemTracker>("SegmentWriter:Segment-" +
-                                                    
std::to_string(segment_id))),
-          _mow_context(std::move(mow_context)) {
+          
_mem_tracker(std::make_unique<MemTracker>(segment_mem_tracker_name(segment_id))),
+          _mow_context(std::move(opts.mow_ctx)) {
     CHECK_NOTNULL(file_writer);
     _num_key_columns = _tablet_schema->num_key_columns();
     _num_short_key_columns = _tablet_schema->num_short_key_columns();
@@ -958,11 +953,11 @@ Status SegmentWriter::append_block(const 
vectorized::Block* block, size_t row_po
 int64_t SegmentWriter::max_row_to_add(size_t row_avg_size_in_bytes) {
     auto segment_size = estimate_segment_size();
     if (PREDICT_FALSE(segment_size >= MAX_SEGMENT_SIZE ||
-                      _num_rows_written >= _max_row_per_segment)) {
+                      _num_rows_written >= _opts.max_rows_per_segment)) {
         return 0;
     }
     int64_t size_rows = ((int64_t)MAX_SEGMENT_SIZE - (int64_t)segment_size) / 
row_avg_size_in_bytes;
-    int64_t count_rows = (int64_t)_max_row_per_segment - _num_rows_written;
+    int64_t count_rows = (int64_t)_opts.max_rows_per_segment - 
_num_rows_written;
 
     return std::min(size_rows, count_rows);
 }
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h 
b/be/src/olap/rowset/segment_v2/segment_writer.h
index 9c667ee92fc..41c3d5da3a7 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -23,18 +23,14 @@
 #include <stddef.h>
 
 #include <cstdint>
-#include <functional>
 #include <map>
 #include <memory> // unique_ptr
 #include <string>
-#include <unordered_set>
 #include <vector>
 
 #include "common/status.h" // Status
 #include "gen_cpp/segment_v2.pb.h"
-#include "gutil/macros.h"
 #include "gutil/strings/substitute.h"
-#include "io/fs/file_system.h"
 #include "olap/olap_define.h"
 #include "olap/rowset/segment_v2/column_writer.h"
 #include "olap/tablet.h"
@@ -71,11 +67,13 @@ extern const uint32_t k_segment_magic_length;
 
 struct SegmentWriterOptions {
     uint32_t num_rows_per_block = 1024;
+    uint32_t max_rows_per_segment = UINT32_MAX;
     bool enable_unique_key_merge_on_write = false;
     CompressionTypePB compression_type = UNKNOWN_COMPRESSION;
 
     RowsetWriterContext* rowset_ctx = nullptr;
     DataWriteType write_type = DataWriteType::TYPE_DEFAULT;
+    std::shared_ptr<MowContext> mow_ctx;
 };
 
 using TabletSharedPtr = std::shared_ptr<Tablet>;
@@ -84,8 +82,7 @@ class SegmentWriter {
 public:
     explicit SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id,
                            TabletSchemaSPtr tablet_schema, BaseTabletSPtr 
tablet, DataDir* data_dir,
-                           uint32_t max_row_per_segment, const 
SegmentWriterOptions& opts,
-                           std::shared_ptr<MowContext> mow_context,
+                           const SegmentWriterOptions& opts,
                            io::FileWriterPtr inverted_file_writer = nullptr);
     ~SegmentWriter();
 
@@ -120,7 +117,7 @@ public:
 
     Status finalize(uint64_t* segment_file_size, uint64_t* index_size);
 
-    uint32_t get_segment_id() { return _segment_id; }
+    uint32_t get_segment_id() const { return _segment_id; }
 
     Status finalize_columns_data();
     Status finalize_columns_index(uint64_t* index_size);
@@ -192,7 +189,6 @@ private:
     TabletSchemaSPtr _tablet_schema;
     BaseTabletSPtr _tablet;
     DataDir* _data_dir = nullptr;
-    uint32_t _max_row_per_segment;
     SegmentWriterOptions _opts;
 
     // Not owned. owned by RowsetWriter or SegmentFlusher
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index ba1bfcf3535..34cfed8502f 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -20,7 +20,6 @@
 #include <gen_cpp/segment_v2.pb.h>
 #include <parallel_hashmap/phmap.h>
 
-#include <algorithm>
 #include <cassert>
 #include <memory>
 #include <ostream>
@@ -80,11 +79,14 @@ using namespace ErrorCode;
 static const char* k_segment_magic = "D0R1";
 static const uint32_t k_segment_magic_length = 4;
 
+inline std::string vertical_segment_writer_mem_tracker_name(uint32_t 
segment_id) {
+    return "VerticalSegmentWriter:Segment-" + std::to_string(segment_id);
+}
+
 VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, 
uint32_t segment_id,
                                              TabletSchemaSPtr tablet_schema, 
BaseTabletSPtr tablet,
-                                             DataDir* data_dir, uint32_t 
max_row_per_segment,
+                                             DataDir* data_dir,
                                              const 
VerticalSegmentWriterOptions& opts,
-                                             std::shared_ptr<MowContext> 
mow_context,
                                              io::FileWriterPtr 
inverted_file_writer)
         : _segment_id(segment_id),
           _tablet_schema(std::move(tablet_schema)),
@@ -92,9 +94,9 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* 
file_writer, uint32
           _data_dir(data_dir),
           _opts(opts),
           _file_writer(file_writer),
-          
_mem_tracker(std::make_unique<MemTracker>("VerticalSegmentWriter:Segment-" +
-                                                    
std::to_string(segment_id))),
-          _mow_context(std::move(mow_context)) {
+          _mem_tracker(std::make_unique<MemTracker>(
+                  vertical_segment_writer_mem_tracker_name(segment_id))),
+          _mow_context(std::move(opts.mow_ctx)) {
     CHECK_NOTNULL(file_writer);
     _num_key_columns = _tablet_schema->num_key_columns();
     _num_short_key_columns = _tablet_schema->num_short_key_columns();
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
index 8068b3e44be..c52deea40a0 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
@@ -68,6 +68,7 @@ struct VerticalSegmentWriterOptions {
 
     RowsetWriterContext* rowset_ctx = nullptr;
     DataWriteType write_type = DataWriteType::TYPE_DEFAULT;
+    std::shared_ptr<MowContext> mow_ctx;
 };
 
 struct RowsInBlock {
@@ -80,9 +81,7 @@ class VerticalSegmentWriter {
 public:
     explicit VerticalSegmentWriter(io::FileWriter* file_writer, uint32_t 
segment_id,
                                    TabletSchemaSPtr tablet_schema, 
BaseTabletSPtr tablet,
-                                   DataDir* data_dir, uint32_t 
max_row_per_segment,
-                                   const VerticalSegmentWriterOptions& opts,
-                                   std::shared_ptr<MowContext> mow_context,
+                                   DataDir* data_dir, const 
VerticalSegmentWriterOptions& opts,
                                    io::FileWriterPtr inverted_file_writer = 
nullptr);
     ~VerticalSegmentWriter();
 
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp 
b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index 863f0f597aa..1db74843697 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -186,9 +186,10 @@ Status VerticalBetaRowsetWriter<T>::_create_segment_writer(
     segment_v2::SegmentWriterOptions writer_options;
     writer_options.enable_unique_key_merge_on_write = 
context.enable_unique_key_merge_on_write;
     writer_options.rowset_ctx = &context;
-    *writer = std::make_unique<segment_v2::SegmentWriter>(
-            file_writer.get(), seg_id, context.tablet_schema, context.tablet, 
context.data_dir,
-            context.max_rows_per_segment, writer_options, nullptr);
+    writer_options.max_rows_per_segment = context.max_rows_per_segment;
+    *writer = std::make_unique<segment_v2::SegmentWriter>(file_writer.get(), 
seg_id,
+                                                          
context.tablet_schema, context.tablet,
+                                                          context.data_dir, 
writer_options);
     RETURN_IF_ERROR(this->_seg_files.add(seg_id, std::move(file_writer)));
 
     auto s = (*writer)->init(column_ids, is_key);
diff --git a/be/test/olap/delete_bitmap_calculator_test.cpp 
b/be/test/olap/delete_bitmap_calculator_test.cpp
index 00856f69f10..7e527078613 100644
--- a/be/test/olap/delete_bitmap_calculator_test.cpp
+++ b/be/test/olap/delete_bitmap_calculator_test.cpp
@@ -103,8 +103,7 @@ public:
         io::FileWriterPtr file_writer;
         Status st = fs->create_file(path, &file_writer);
         EXPECT_TRUE(st.ok());
-        SegmentWriter writer(file_writer.get(), segment_id, build_schema, 
nullptr, nullptr,
-                             INT32_MAX, opts, nullptr);
+        SegmentWriter writer(file_writer.get(), segment_id, build_schema, 
nullptr, nullptr, opts);
         st = writer.init();
         EXPECT_TRUE(st.ok());
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to