github-actions[bot] commented on code in PR #16990: URL: https://github.com/apache/doris/pull/16990#discussion_r1122889021
########## be/src/io/hdfs_builder.h: ########## @@ -31,13 +31,20 @@ const std::string KERBEROS_KEYTAB = "hadoop.kerberos.keytab"; const std::string TICKET_CACHE_PATH = "/tmp/krb5cc_doris_"; class HDFSCommonBuilder { - friend HDFSCommonBuilder createHDFSBuilder(const THdfsParams& hdfsParams); - friend HDFSCommonBuilder createHDFSBuilder( - const std::map<std::string, std::string>& properties); + friend Status createHDFSBuilder(const THdfsParams& hdfsParams, HDFSCommonBuilder* builder); + friend Status createHDFSBuilder(const std::map<std::string, std::string>& properties, + HDFSCommonBuilder* builder); public: - HDFSCommonBuilder() : hdfs_builder(hdfsNewBuilder()) {} - ~HDFSCommonBuilder() { hdfsFreeBuilder(hdfs_builder); } + HDFSCommonBuilder() {} Review Comment: warning: use '= default' to define a trivial default constructor [modernize-use-equals-default] ```suggestion HDFSCommonBuilder() = default; ``` ########## be/src/olap/tablet.cpp: ########## @@ -420,6 +372,7 @@ Status Tablet::modify_rowsets(std::vector<RowsetSharedPtr>& to_add, } void Tablet::add_rowsets(const std::vector<RowsetSharedPtr>& to_add) { + if (to_add.empty()) return; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (to_add.empty()) { return; } ``` ########## be/src/olap/rowset/segcompaction.h: ########## @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once +#include <stdint.h> + +#include <ctime> // time +#include <memory> +#include <sstream> + +#include "common/status.h" +#include "olap/merger.h" +#include "olap/rowset/rowset_writer.h" +#include "segment_v2/segment.h" +#include "segment_v2/segment_writer.h" +#include "vec/olap/vertical_block_reader.h" + +namespace doris { +using SegCompactionCandidates = std::vector<segment_v2::SegmentSharedPtr>; +using SegCompactionCandidatesSharedPtr = std::shared_ptr<SegCompactionCandidates>; + +class BetaRowsetWriter; + +class SegcompactionWorker { + friend class BetaRowsetWriter; + +public: + SegcompactionWorker(BetaRowsetWriter* writer) { _writer = writer; } + + void compact_segments(SegCompactionCandidatesSharedPtr segments); + + io::FileWriterPtr& get_file_writer() { return _file_writer; } + +private: + Status _create_segment_writer_for_segcompaction( + std::unique_ptr<segment_v2::SegmentWriter>* writer, uint64_t begin, uint64_t end); + Status _get_segcompaction_reader(SegCompactionCandidatesSharedPtr segments, + TabletSharedPtr tablet, std::shared_ptr<Schema> schema, + OlapReaderStatistics* stat, + vectorized::RowSourcesBuffer& row_sources_buf, bool is_key, + std::vector<uint32_t>& return_columns, + std::unique_ptr<vectorized::VerticalBlockReader>* reader); + std::unique_ptr<segment_v2::SegmentWriter> _create_segcompaction_writer(uint64_t begin, + uint64_t end); + Status _delete_original_segments(uint32_t begin, uint32_t end); + Status _check_correctness(OlapReaderStatistics& reader_stat, Merger::Statistics& merger_stat, + uint64_t begin, uint64_t end); + Status _do_compact_segments(SegCompactionCandidatesSharedPtr segments); + +private: Review Comment: warning: redundant access specifier has the same accessibility as the previous access specifier [readability-redundant-access-specifiers] ```suggestion ``` **be/src/olap/rowset/segcompaction.h:47:** previously declared here ```cpp private: ^ ``` ########## be/src/runtime/memory/mem_tracker.h: ########## @@ -43,6 +43,50 @@ class MemTracker { int64_t peak_consumption = 0; }; + // A counter that keeps track of the current and peak value seen. + // Relaxed ordering, not accurate in real time. + class MemCounter { + public: + MemCounter() : _current_value(0), _peak_value(0) {} + + void add(int64_t delta) { + _current_value.fetch_add(delta, std::memory_order_relaxed); + update_peak(); + } + + void add_no_update_peak(int64_t delta) { + _current_value.fetch_add(delta, std::memory_order_relaxed); + } + + bool try_add(int64_t delta, int64_t max) { + if (UNLIKELY(_current_value.load(std::memory_order_relaxed) + delta > max)) + return false; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (UNLIKELY(_current_value.load(std::memory_order_relaxed) + delta > max)) { return false; } ``` ########## be/src/olap/tablet.cpp: ########## @@ -431,6 +384,7 @@ } void Tablet::delete_rowsets(const std::vector<RowsetSharedPtr>& to_delete, bool move_to_stale) { + if (to_delete.empty()) return; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (to_delete.empty()) { return; } ``` ########## be/test/olap/segcompaction_test.cpp: ########## @@ -184,6 +186,27 @@ rowset_writer_context->tablet_schema = tablet_schema; rowset_writer_context->version.first = 10; rowset_writer_context->version.second = 10; + +#if 0 + TCreateTabletReq req; + req.table_id = + req.tablet_id = + req.tablet_scheme = + req.partition_id = + l_engine->create_tablet(req); + rowset_writer_context->tablet = l_engine->tablet_manager()->get_tablet(TTabletId tablet_id); +#endif + std::shared_ptr<DataDir> data_dir = std::make_shared<DataDir>(lTestDir); + TabletMetaSharedPtr tablet_meta = std::make_shared<TabletMeta>(); + tablet_meta->_tablet_id = 1; + tablet_meta->_schema = tablet_schema; Review Comment: warning: '_schema' is a private member of 'doris::TabletMeta' [clang-diagnostic-error] ```cpp tablet_meta->_schema = tablet_schema; ^ ``` **be/src/olap/tablet_meta.h:228:** declared private here ```cpp TabletSchemaSPtr _schema; ^ ``` ########## be/test/olap/segcompaction_test.cpp: ########## @@ -200,12 +223,124 @@ std::unique_ptr<DataDir> _data_dir; }; +TEST_F(SegCompactionTest, SegCompactionThenRead) { + config::enable_segcompaction = true; + config::enable_storage_vectorization = true; + Status s; + TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(tablet_schema, DUP_KEYS); + + RowsetSharedPtr rowset; + const int num_segments = 15; + const uint32_t rows_per_segment = 4096; + config::segcompaction_small_threshold = 6000; // set threshold above + // rows_per_segment + config::segcompaction_threshold_segment_num = 10; + std::vector<uint32_t> segment_num_rows; + { // write `num_segments * rows_per_segment` rows to rowset + RowsetWriterContext writer_context; + create_rowset_writer_context(10047, tablet_schema, &writer_context); + + std::unique_ptr<RowsetWriter> rowset_writer; + s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + EXPECT_EQ(Status::OK(), s); + + RowCursor input_row; + input_row.init(tablet_schema); + + // for segment "i", row "rid" + // k1 := rid*10 + i + // k2 := k1 * 10 + // k3 := rid + for (int i = 0; i < num_segments; ++i) { + MemPool mem_pool; + for (int rid = 0; rid < rows_per_segment; ++rid) { + uint32_t k1 = rid * 100 + i; + uint32_t k2 = i; + uint32_t k3 = rid; + input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool); Review Comment: warning: no member named 'set_field_content' in 'doris::RowCursor' [clang-diagnostic-error] ```cpp input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool); ^ ``` ########## be/test/olap/segcompaction_test.cpp: ########## @@ -200,12 +223,124 @@ std::unique_ptr<DataDir> _data_dir; }; +TEST_F(SegCompactionTest, SegCompactionThenRead) { + config::enable_segcompaction = true; + config::enable_storage_vectorization = true; + Status s; + TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(tablet_schema, DUP_KEYS); + + RowsetSharedPtr rowset; + const int num_segments = 15; + const uint32_t rows_per_segment = 4096; + config::segcompaction_small_threshold = 6000; // set threshold above + // rows_per_segment + config::segcompaction_threshold_segment_num = 10; + std::vector<uint32_t> segment_num_rows; + { // write `num_segments * rows_per_segment` rows to rowset + RowsetWriterContext writer_context; + create_rowset_writer_context(10047, tablet_schema, &writer_context); + + std::unique_ptr<RowsetWriter> rowset_writer; + s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + EXPECT_EQ(Status::OK(), s); + + RowCursor input_row; + input_row.init(tablet_schema); + + // for segment "i", row "rid" + // k1 := rid*10 + i + // k2 := k1 * 10 + // k3 := rid + for (int i = 0; i < num_segments; ++i) { + MemPool mem_pool; + for (int rid = 0; rid < rows_per_segment; ++rid) { + uint32_t k1 = rid * 100 + i; + uint32_t k2 = i; + uint32_t k3 = rid; + input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool); + input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool); + input_row.set_field_content(2, reinterpret_cast<char*>(&k3), &mem_pool); Review Comment: warning: no member named 'set_field_content' in 'doris::RowCursor' [clang-diagnostic-error] ```cpp input_row.set_field_content(2, reinterpret_cast<char*>(&k3), &mem_pool); ^ ``` ########## be/test/olap/segcompaction_test.cpp: ########## @@ -184,6 +186,27 @@ rowset_writer_context->tablet_schema = tablet_schema; rowset_writer_context->version.first = 10; rowset_writer_context->version.second = 10; + +#if 0 + TCreateTabletReq req; + req.table_id = + req.tablet_id = + req.tablet_scheme = + req.partition_id = + l_engine->create_tablet(req); + rowset_writer_context->tablet = l_engine->tablet_manager()->get_tablet(TTabletId tablet_id); +#endif + std::shared_ptr<DataDir> data_dir = std::make_shared<DataDir>(lTestDir); + TabletMetaSharedPtr tablet_meta = std::make_shared<TabletMeta>(); + tablet_meta->_tablet_id = 1; + tablet_meta->_schema = tablet_schema; + auto tablet = std::make_shared<Tablet>(tablet_meta, data_dir.get(), "test_str"); + char* tmp_str = (char*)malloc(20); + strncpy(tmp_str, "test_tablet_name", 20); + + tablet->_full_name = tmp_str; Review Comment: warning: '_full_name' is a protected member of 'doris::BaseTablet' [clang-diagnostic-error] ```cpp tablet->_full_name = tmp_str; ^ ``` **be/src/olap/base_tablet.h:84:** declared protected here ```cpp std::string _full_name; ^ ``` ########## be/test/olap/segcompaction_test.cpp: ########## @@ -200,12 +223,124 @@ std::unique_ptr<DataDir> _data_dir; }; +TEST_F(SegCompactionTest, SegCompactionThenRead) { + config::enable_segcompaction = true; + config::enable_storage_vectorization = true; + Status s; + TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(tablet_schema, DUP_KEYS); + + RowsetSharedPtr rowset; + const int num_segments = 15; + const uint32_t rows_per_segment = 4096; + config::segcompaction_small_threshold = 6000; // set threshold above + // rows_per_segment + config::segcompaction_threshold_segment_num = 10; + std::vector<uint32_t> segment_num_rows; + { // write `num_segments * rows_per_segment` rows to rowset + RowsetWriterContext writer_context; + create_rowset_writer_context(10047, tablet_schema, &writer_context); + + std::unique_ptr<RowsetWriter> rowset_writer; + s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + EXPECT_EQ(Status::OK(), s); + + RowCursor input_row; + input_row.init(tablet_schema); + + // for segment "i", row "rid" + // k1 := rid*10 + i + // k2 := k1 * 10 + // k3 := rid + for (int i = 0; i < num_segments; ++i) { + MemPool mem_pool; + for (int rid = 0; rid < rows_per_segment; ++rid) { + uint32_t k1 = rid * 100 + i; + uint32_t k2 = i; + uint32_t k3 = rid; + input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool); + input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool); Review Comment: warning: no member named 'set_field_content' in 'doris::RowCursor' [clang-diagnostic-error] ```cpp input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool); ^ ``` ########## be/test/olap/segcompaction_test.cpp: ########## @@ -200,12 +223,124 @@ std::unique_ptr<DataDir> _data_dir; }; +TEST_F(SegCompactionTest, SegCompactionThenRead) { + config::enable_segcompaction = true; + config::enable_storage_vectorization = true; + Status s; + TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(tablet_schema, DUP_KEYS); + + RowsetSharedPtr rowset; + const int num_segments = 15; + const uint32_t rows_per_segment = 4096; + config::segcompaction_small_threshold = 6000; // set threshold above + // rows_per_segment + config::segcompaction_threshold_segment_num = 10; + std::vector<uint32_t> segment_num_rows; + { // write `num_segments * rows_per_segment` rows to rowset + RowsetWriterContext writer_context; + create_rowset_writer_context(10047, tablet_schema, &writer_context); + + std::unique_ptr<RowsetWriter> rowset_writer; + s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + EXPECT_EQ(Status::OK(), s); + + RowCursor input_row; + input_row.init(tablet_schema); + + // for segment "i", row "rid" + // k1 := rid*10 + i + // k2 := k1 * 10 + // k3 := rid + for (int i = 0; i < num_segments; ++i) { + MemPool mem_pool; + for (int rid = 0; rid < rows_per_segment; ++rid) { + uint32_t k1 = rid * 100 + i; + uint32_t k2 = i; + uint32_t k3 = rid; + input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool); + input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool); + input_row.set_field_content(2, reinterpret_cast<char*>(&k3), &mem_pool); + s = rowset_writer->add_row(input_row); Review Comment: warning: no member named 'add_row' in 'doris::RowsetWriter' [clang-diagnostic-error] ```cpp s = rowset_writer->add_row(input_row); ^ ``` ########## be/src/vec/functions/array/function_array_apply.cpp: ########## @@ -0,0 +1,219 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <vec/columns/column_array.h> +#include <vec/columns/column_nullable.h> +#include <vec/columns/columns_number.h> +#include <vec/data_types/data_type_array.h> +#include <vec/data_types/data_type_number.h> +#include <vec/functions/function.h> +#include <vec/functions/function_helpers.h> +#include <vec/functions/simple_function_factory.h> + +namespace doris::vectorized { + +// array_apply([1, 2, 3, 10], ">=", 5) -> [10] +// This function is temporary, use it to meet the requirement before implementing the lambda function. +class FunctionArrayApply : public IFunction { +public: + static constexpr auto name = "array_apply"; + + static FunctionPtr create() { return std::make_shared<FunctionArrayApply>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 3; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + DCHECK(is_array(arguments[0])) + << "first argument for function: " << name << " should be DataTypeArray" + << " and arguments[0] is " << arguments[0]->get_name(); + return arguments[0]; + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + ColumnPtr src_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + const auto& src_column_array = check_and_get_column<ColumnArray>(*src_column); + if (!src_column_array) { + return Status::RuntimeError( + fmt::format("unsupported types for function {}({})", get_name(), + block.get_by_position(arguments[0]).type->get_name())); + } + const auto& src_offsets = src_column_array->get_offsets(); + const auto* src_nested_column = &src_column_array->get_data(); + DCHECK(src_nested_column != nullptr); + + DataTypePtr src_column_type = block.get_by_position(arguments[0]).type; + auto nested_type = assert_cast<const DataTypeArray&>(*src_column_type).get_nested_type(); + const std::string& condition = + block.get_by_position(arguments[1]).column->get_data_at(0).to_string(); + if (!is_column_const(*block.get_by_position(arguments[2]).column)) { + return Status::RuntimeError( + "execute failed or unsupported column, only support const column"); + } + const ColumnConst& rhs_value_column = + static_cast<const ColumnConst&>(*block.get_by_position(arguments[2]).column.get()); + ColumnPtr result_ptr; + RETURN_IF_ERROR(_execute(*src_nested_column, nested_type, src_offsets, condition, + rhs_value_column, &result_ptr)); + block.replace_by_position(result, std::move(result_ptr)); + return Status::OK(); + } + +private: + enum class ApplyOp { + UNKNOWN = 0, + EQ = 1, + NE = 2, + LT = 3, + LE = 4, + GT = 5, + GE = 6, + }; + template <typename T, ApplyOp op> + bool apply(T data, T comp) { + if constexpr (op == ApplyOp::EQ) { + return data == comp; + } + if constexpr (op == ApplyOp::NE) { + return data != comp; + } + if constexpr (op == ApplyOp::LT) { + return data < comp; + } + if constexpr (op == ApplyOp::LE) { + return data <= comp; + } + if constexpr (op == ApplyOp::GT) { + return data > comp; + } + if constexpr (op == ApplyOp::GE) { + return data >= comp; + } + __builtin_unreachable(); + } + + template <typename T, ApplyOp op> + ColumnPtr _apply_internal(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + const ColumnConst& cmp) { + T rhs_val = *reinterpret_cast<const T*>(cmp.get_data_at(0).data); + auto column_filter = ColumnUInt8::create(src_column.size(), 0); + auto& column_filter_data = column_filter->get_data(); + const char* src_column_data_ptr = nullptr; + if (!src_column.is_nullable()) { + src_column_data_ptr = src_column.get_raw_data().data; + } else { + src_column_data_ptr = check_and_get_column<ColumnNullable>(src_column) + ->get_nested_column() + .get_raw_data() + .data; + } + for (size_t i = 0; i < src_column.size(); ++i) { + T lhs_val = *reinterpret_cast<const T*>(src_column_data_ptr); + if (apply<T, op>(lhs_val, rhs_val)) { + column_filter_data[i] = 1; + } + src_column_data_ptr += sizeof(T); + } + const IColumn::Filter& filter = column_filter_data; + ColumnPtr filtered = src_column.filter(filter, src_column.size()); + auto column_offsets = ColumnArray::ColumnOffsets::create(src_offsets.size()); + ColumnArray::Offsets64& dst_offsets = column_offsets->get_data(); + size_t in_pos = 0; + size_t out_pos = 0; + for (size_t i = 0; i < src_offsets.size(); ++i) { + for (; in_pos < src_offsets[i]; ++in_pos) { + if (filter[in_pos]) ++out_pos; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (filter[in_pos]) { ++out_pos; } ``` ########## be/test/olap/segcompaction_test.cpp: ########## @@ -184,6 +186,27 @@ class SegCompactionTest : public testing::Test { rowset_writer_context->tablet_schema = tablet_schema; rowset_writer_context->version.first = 10; rowset_writer_context->version.second = 10; + +#if 0 + TCreateTabletReq req; + req.table_id = + req.tablet_id = + req.tablet_scheme = + req.partition_id = + l_engine->create_tablet(req); + rowset_writer_context->tablet = l_engine->tablet_manager()->get_tablet(TTabletId tablet_id); +#endif + std::shared_ptr<DataDir> data_dir = std::make_shared<DataDir>(lTestDir); + TabletMetaSharedPtr tablet_meta = std::make_shared<TabletMeta>(); + tablet_meta->_tablet_id = 1; Review Comment: warning: '_tablet_id' is a private member of 'doris::TabletMeta' [clang-diagnostic-error] ```cpp tablet_meta->_tablet_id = 1; ^ ``` **be/src/olap/tablet_meta.h:216:** declared private here ```cpp int64_t _tablet_id = 0; ^ ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org