This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a0a09b0eac4 [refine](function) use concrete column pointers for local
result columns (#63938)
a0a09b0eac4 is described below
commit a0a09b0eac406d143a57752ff06430e2dd14d4e0
Author: Mryange <[email protected]>
AuthorDate: Tue Jun 2 13:50:32 2026 +0800
[refine](function) use concrete column pointers for local result columns
(#63938)
### What problem does this PR solve?
Some BE expression and storage code creates a concrete column type and
then immediately casts the generic `ColumnPtr` or `MutableColumnPtr`
back to the same concrete type before writing data. This adds
unnecessary casts and makes the ownership intent less direct. Root
cause: several local result columns were declared as generic column
pointers even though the concrete column type was already known at
creation time.
This PR refines those local variables to keep concrete column pointers
where the type is explicit, and directly accesses the concrete column
data. It also updates the explode-numbers table function member to use a
concrete column pointer. The change is limited to local refactoring and
does not change runtime behavior.
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
.../exprs/function/array/function_array_exists.cpp | 5 +-
be/src/exprs/function/function_ip.h | 11 ++-
be/src/exprs/function/function_jsonb.cpp | 4 +-
.../function/function_other_types_to_date.cpp | 101 ++++++++++-----------
be/src/exprs/function/random.cpp | 2 +-
be/src/exprs/function/uniform.cpp | 2 +-
be/src/exprs/table_function/vexplode_numbers.cpp | 4 +-
be/src/exprs/table_function/vexplode_numbers.h | 2 +-
be/src/storage/iterator/olap_data_convertor.h | 5 +-
.../segment/variant/hierarchical_data_iterator.cpp | 12 +--
.../variant_doc_snpashot_compact_iterator.h | 11 +--
11 files changed, 73 insertions(+), 86 deletions(-)
diff --git a/be/src/exprs/function/array/function_array_exists.cpp
b/be/src/exprs/function/array/function_array_exists.cpp
index 9009ba2f755..ffa74d24e8d 100644
--- a/be/src/exprs/function/array/function_array_exists.cpp
+++ b/be/src/exprs/function/array/function_array_exists.cpp
@@ -78,9 +78,8 @@ public:
nested_nullable_column.get_null_map_column_ptr()->clone_resized(nested_column_size);
// 2. compute result
- MutableColumnPtr result_column =
ColumnUInt8::create(nested_column_size, 0);
- auto* __restrict result_column_data =
- assert_cast<ColumnUInt8&>(*result_column).get_data().data();
+ auto result_column = ColumnUInt8::create(nested_column_size, 0);
+ auto* __restrict result_column_data = result_column->get_data().data();
MutableColumnPtr result_offset_column =
first_off_data.clone_resized(first_off_data.size());
const auto* __restrict nested_column_data =
assert_cast<const
ColumnUInt8&>(*nested_column).get_data().data();
diff --git a/be/src/exprs/function/function_ip.h
b/be/src/exprs/function/function_ip.h
index cb176a081ef..5c284412848 100644
--- a/be/src/exprs/function/function_ip.h
+++ b/be/src/exprs/function/function_ip.h
@@ -1345,10 +1345,10 @@ public:
unpack_if_const(ipv6_column_with_type_and_name.column);
const auto* ipv6_addr_column = assert_cast<const
ColumnString*>(ipv6_column.get());
// result is nullable column
- auto col_res =
ColumnNullable::create(ColumnIPv6::create(input_rows_count, 0),
-
ColumnUInt8::create(input_rows_count, 1));
- auto& col_res_data =
assert_cast<ColumnIPv6*>(&col_res->get_nested_column())->get_data();
- auto& res_null_map_data = col_res->get_null_map_data();
+ auto col_res_nested = ColumnIPv6::create(input_rows_count, 0);
+ auto col_res_null_map = ColumnUInt8::create(input_rows_count, 1);
+ auto& col_res_data = col_res_nested->get_data();
+ auto& res_null_map_data = col_res_null_map->get_data();
for (size_t i = 0; i < input_rows_count; ++i) {
IPv6 ipv6 = 0;
@@ -1364,7 +1364,8 @@ public:
}
}
- block.replace_by_position(result, std::move(col_res));
+ block.replace_by_position(result,
ColumnNullable::create(std::move(col_res_nested),
+
std::move(col_res_null_map)));
return Status::OK();
}
};
diff --git a/be/src/exprs/function/function_jsonb.cpp
b/be/src/exprs/function/function_jsonb.cpp
index 04b902f7933..84238c6d688 100644
--- a/be/src/exprs/function/function_jsonb.cpp
+++ b/be/src/exprs/function/function_jsonb.cpp
@@ -709,11 +709,11 @@ public:
return Status::OK();
};
- MutableColumnPtr result_null_map_column;
+ ColumnUInt8::MutablePtr result_null_map_column;
NullMap* result_null_map = nullptr;
if (data_null_map || path_null_map) {
result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
- result_null_map =
&static_cast<ColumnUInt8&>(*result_null_map_column).get_data();
+ result_null_map = &result_null_map_column->get_data();
if (data_null_map) {
VectorizedUtils::update_null_map(*result_null_map,
*data_null_map,
diff --git a/be/src/exprs/function/function_other_types_to_date.cpp
b/be/src/exprs/function/function_other_types_to_date.cpp
index bc24d76df27..122fdb9338a 100644
--- a/be/src/exprs/function/function_other_types_to_date.cpp
+++ b/be/src/exprs/function/function_other_types_to_date.cpp
@@ -142,31 +142,29 @@ struct StrToDate {
// Because of we cant distinguish by return_type when we find
function. so the return_type may NOT be same with real return type
// which decided by FE. we directly use block column's type which
decided by FE.
if (block.get_by_position(result).type->get_primitive_type() ==
TYPE_DATEV2) {
- res = ColumnDateV2::create(input_rows_count);
+ auto res_column = ColumnDateV2::create(input_rows_count);
if (col_const[1]) {
- execute_impl_const_right<TYPE_DATEV2>(
- context, ldata, loffsets,
specific_char_column->get_data_at(0),
- result_null_map,
-
static_cast<ColumnDateV2*>(res->assert_mutable().get())->get_data());
+ execute_impl_const_right<TYPE_DATEV2>(context, ldata, loffsets,
+
specific_char_column->get_data_at(0),
+ result_null_map,
res_column->get_data());
} else {
- execute_impl<TYPE_DATEV2>(
- context, ldata, loffsets, rdata, roffsets,
result_null_map,
-
static_cast<ColumnDateV2*>(res->assert_mutable().get())->get_data());
+ execute_impl<TYPE_DATEV2>(context, ldata, loffsets, rdata,
roffsets,
+ result_null_map,
res_column->get_data());
}
+ res = std::move(res_column);
} else {
DCHECK(block.get_by_position(result).type->get_primitive_type() ==
TYPE_DATETIMEV2);
- res = ColumnDateTimeV2::create(input_rows_count);
+ auto res_column = ColumnDateTimeV2::create(input_rows_count);
if (col_const[1]) {
- execute_impl_const_right<TYPE_DATETIMEV2>(
- context, ldata, loffsets,
specific_char_column->get_data_at(0),
- result_null_map,
-
static_cast<ColumnDateTimeV2*>(res->assert_mutable().get())->get_data());
+ execute_impl_const_right<TYPE_DATETIMEV2>(context, ldata,
loffsets,
+
specific_char_column->get_data_at(0),
+ result_null_map,
res_column->get_data());
} else {
- execute_impl<TYPE_DATETIMEV2>(
- context, ldata, loffsets, rdata, roffsets,
result_null_map,
-
static_cast<ColumnDateTimeV2*>(res->assert_mutable().get())->get_data());
+ execute_impl<TYPE_DATETIMEV2>(context, ldata, loffsets, rdata,
roffsets,
+ result_null_map,
res_column->get_data());
}
+ res = std::move(res_column);
}
// Wrap result in nullable column only if input has nullable arguments
@@ -292,17 +290,13 @@ struct MakeDateImpl {
const auto* year_col = assert_cast<const
ColumnInt32*>(argument_columns[0].get());
const auto* dayofyear_col = assert_cast<const
ColumnInt32*>(argument_columns[1].get());
- ColumnPtr res_column;
-
- res_column = ColumnDateV2::create(input_rows_count);
+ auto res_column = ColumnDateV2::create(input_rows_count);
if (col_const[1]) {
- execute_impl_right_const(
- year_col->get_data(), dayofyear_col->get_element(0),
result_null_map,
-
static_cast<ColumnDateV2*>(res_column->assert_mutable().get())->get_data());
+ execute_impl_right_const(year_col->get_data(),
dayofyear_col->get_element(0),
+ result_null_map, res_column->get_data());
} else {
- execute_impl(
- year_col->get_data(), dayofyear_col->get_data(),
result_null_map,
-
static_cast<ColumnDateV2*>(res_column->assert_mutable().get())->get_data());
+ execute_impl(year_col->get_data(), dayofyear_col->get_data(),
result_null_map,
+ res_column->get_data());
}
// Wrap result in nullable column only if input has nullable arguments
@@ -451,13 +445,6 @@ private:
}
};
-struct DateTruncState {
- using Callback_function =
- std::function<void(const ColumnPtr&, ColumnPtr& res, size_t, const
cctz::time_zone&)>;
- Callback_function callback_function;
- cctz::time_zone timezone;
-};
-
template <PrimitiveType PType, bool DateArgIsFirst>
struct DateTrunc {
static constexpr auto name = "date_trunc";
@@ -465,6 +452,13 @@ struct DateTrunc {
using ColumnType = typename PrimitiveTypeTraits<PType>::ColumnType;
using DateValueType = typename PrimitiveTypeTraits<PType>::CppType;
+ struct State {
+ using CallbackFunction =
+ std::function<void(const ColumnPtr&, ColumnType&, size_t,
const cctz::time_zone&)>;
+ CallbackFunction callback_function;
+ cctz::time_zone timezone;
+ };
+
static bool is_variadic() { return true; }
static size_t get_number_of_arguments() { return 2; }
@@ -495,7 +489,7 @@ struct DateTrunc {
std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(),
[](unsigned char c) { return std::tolower(c); });
- std::shared_ptr<DateTruncState> state =
std::make_shared<DateTruncState>();
+ std::shared_ptr<State> state = std::make_shared<State>();
state->timezone = context->state()->timezone_obj();
if (std::strncmp("year", lower_str.data(), 4) == 0) {
state->callback_function =
&execute_impl_right_const<TimeUnit::YEAR>;
@@ -528,21 +522,22 @@ struct DateTrunc {
const auto& datetime_column =
block.get_by_position(arguments[DateArgIsFirst ? 0 : 1])
.column->convert_to_full_column_if_const();
- ColumnPtr res = ColumnType::create(input_rows_count);
- auto* state = reinterpret_cast<DateTruncState*>(
+ auto res = ColumnType::create(input_rows_count);
+ auto* state = reinterpret_cast<State*>(
context->get_function_state(FunctionContext::THREAD_LOCAL));
DCHECK(state != nullptr);
- state->callback_function(datetime_column, res, input_rows_count,
state->timezone);
+ state->callback_function(datetime_column, *res, input_rows_count,
state->timezone);
block.replace_by_position(result, std::move(res));
return Status::OK();
}
private:
template <TimeUnit Unit>
- static void execute_impl_right_const(const ColumnPtr& datetime_column,
ColumnPtr& result_column,
- size_t input_rows_count, const
cctz::time_zone& timezone) {
+ static void execute_impl_right_const(const ColumnPtr& datetime_column,
+ ColumnType& result_column, size_t
input_rows_count,
+ const cctz::time_zone& timezone) {
auto& data = static_cast<const
ColumnType*>(datetime_column.get())->get_data();
- auto& res =
static_cast<ColumnType*>(result_column->assert_mutable().get())->get_data();
+ auto& res = result_column.get_data();
for (size_t i = 0; i < input_rows_count; ++i) {
auto dt = data[i];
// datetime_trunc only raise only when dt invalid which is
impossible. so we dont throw error better.
@@ -609,15 +604,15 @@ public:
ColumnPtr res_column;
if (block.get_by_position(result).type->get_primitive_type() ==
PrimitiveType::TYPE_DATE) {
- res_column = ColumnDate::create(input_rows_count);
- _execute<VecDateTimeValue>(
- input_rows_count, data_col->get_data(), result_null_map,
-
static_cast<ColumnDateTime*>(res_column->assert_mutable().get())->get_data());
+ auto column_date = ColumnDate::create(input_rows_count);
+ _execute<VecDateTimeValue>(input_rows_count, data_col->get_data(),
result_null_map,
+ column_date->get_data());
+ res_column = std::move(column_date);
} else {
- res_column = ColumnDateV2::create(input_rows_count);
- _execute<DateV2Value<DateV2ValueType>>(
- input_rows_count, data_col->get_data(), result_null_map,
-
static_cast<ColumnDateV2*>(res_column->assert_mutable().get())->get_data());
+ auto column_datev2 = ColumnDateV2::create(input_rows_count);
+ _execute<DateV2Value<DateV2ValueType>>(input_rows_count,
data_col->get_data(),
+ result_null_map,
column_datev2->get_data());
+ res_column = std::move(column_datev2);
}
// Wrap result in nullable column only if input has nullable arguments
@@ -1053,10 +1048,8 @@ struct LastDayImpl {
const auto is_nullable =
block.get_by_position(result).type->is_nullable();
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
auto res_column = ResultColumnType::create(input_rows_count);
- execute_straight(
- input_rows_count, data_col->get_data(),
-
static_cast<ResultColumnType*>(res_column->assert_mutable().get())->get_data(),
- result_null_map);
+ execute_straight(input_rows_count, data_col->get_data(),
res_column->get_data(),
+ result_null_map);
if (is_nullable) {
block.replace_by_position(result,
@@ -1128,10 +1121,8 @@ struct ToMondayImpl {
const auto is_nullable =
block.get_by_position(result).type->is_nullable();
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
auto res_column = ResultColumnType::create(input_rows_count);
- execute_straight(
- input_rows_count, data_col->get_data(),
-
static_cast<ResultColumnType*>(res_column->assert_mutable().get())->get_data(),
- result_null_map);
+ execute_straight(input_rows_count, data_col->get_data(),
res_column->get_data(),
+ result_null_map);
if (is_nullable) {
block.replace_by_position(result,
diff --git a/be/src/exprs/function/random.cpp b/be/src/exprs/function/random.cpp
index b84190daf9c..8bbbb3669bf 100644
--- a/be/src/exprs/function/random.cpp
+++ b/be/src/exprs/function/random.cpp
@@ -143,7 +143,7 @@ private:
static const double min = 0.0;
static const double max = 1.0;
auto res_column = ColumnFloat64::create(input_rows_count);
- auto& res_data = static_cast<ColumnFloat64&>(*res_column).get_data();
+ auto& res_data = res_column->get_data();
auto* generator = reinterpret_cast<std::mt19937_64*>(
context->get_function_state(FunctionContext::THREAD_LOCAL));
diff --git a/be/src/exprs/function/uniform.cpp
b/be/src/exprs/function/uniform.cpp
index 9f1dd3ad073..8749dc07acb 100644
--- a/be/src/exprs/function/uniform.cpp
+++ b/be/src/exprs/function/uniform.cpp
@@ -105,7 +105,7 @@ struct UniformDoubleImpl {
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
auto res_column = ColumnFloat64::create(input_rows_count);
- auto& res_data = static_cast<ColumnFloat64&>(*res_column).get_data();
+ auto& res_data = res_column->get_data();
// Get min and max values (constants)
const auto& left =
diff --git a/be/src/exprs/table_function/vexplode_numbers.cpp
b/be/src/exprs/table_function/vexplode_numbers.cpp
index 0f93dec02f1..fe90119cefe 100644
--- a/be/src/exprs/table_function/vexplode_numbers.cpp
+++ b/be/src/exprs/table_function/vexplode_numbers.cpp
@@ -64,14 +64,14 @@ Status VExplodeNumbersTableFunction::process_init(Block*
block, RuntimeState* st
_cur_size = assert_cast<const
ColumnInt32*>(column_nested.get())->get_element(0);
}
- ((ColumnInt32*)_elements_column.get())->clear();
+ _elements_column->clear();
//_cur_size may be a negative number
_cur_size = std::max(static_cast<int64_t>(0L), _cur_size);
if (_cur_size &&
_cur_size <= state->batch_size()) { // avoid elements_column too
big or empty
_is_const = true; // use const optimize
for (int i = 0; i < _cur_size; i++) {
- ((ColumnInt32*)_elements_column.get())->insert_value(i);
+ _elements_column->insert_value(i);
}
}
}
diff --git a/be/src/exprs/table_function/vexplode_numbers.h
b/be/src/exprs/table_function/vexplode_numbers.h
index 9a862b666bb..958a170477b 100644
--- a/be/src/exprs/table_function/vexplode_numbers.h
+++ b/be/src/exprs/table_function/vexplode_numbers.h
@@ -84,7 +84,7 @@ public:
private:
ColumnPtr _value_column;
- ColumnPtr _elements_column = ColumnInt32::create();
+ ColumnInt32::MutablePtr _elements_column = ColumnInt32::create();
};
} // namespace doris
diff --git a/be/src/storage/iterator/olap_data_convertor.h
b/be/src/storage/iterator/olap_data_convertor.h
index 8909a400601..680bc9f6765 100644
--- a/be/src/storage/iterator/olap_data_convertor.h
+++ b/be/src/storage/iterator/olap_data_convertor.h
@@ -180,11 +180,10 @@ private:
static ColumnPtr clone_and_padding(const ColumnString* input, size_t
padding_length) {
auto column = ColumnString::create();
- auto padded_column =
assert_cast<ColumnString*>(column->assert_mutable().get());
column->offsets.resize(input->size());
column->chars.resize(input->size() * padding_length);
- memset(padded_column->chars.data(), 0, input->size() *
padding_length);
+ memset(column->chars.data(), 0, input->size() * padding_length);
for (size_t i = 0; i < input->size(); i++) {
column->offsets[i] = cast_set<uint32_t, size_t, false>((i + 1)
* padding_length);
@@ -196,7 +195,7 @@ private:
<< ", real=" << str.size;
if (str.size) {
- memcpy(padded_column->chars.data() + i * padding_length,
str.data, str.size);
+ memcpy(column->chars.data() + i * padding_length,
str.data, str.size);
}
}
diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
index 878ca16019b..9e75f307280 100644
--- a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
+++ b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
@@ -223,10 +223,8 @@ Status HierarchicalDataIterator::_process_nested_columns(
for (const auto& entry : nested_subcolumns) {
const auto* base_array =
assert_cast<const
ColumnArray*>(remove_nullable(entry.second[0].column).get());
- MutableColumnPtr nested_object =
- ColumnVariant::create(0, false, base_array->get_data().size());
+ auto nested_object_variant = ColumnVariant::create(0, false,
base_array->get_data().size());
MutableColumnPtr offset =
IColumn::mutate(base_array->get_offsets_ptr());
- auto* nested_object_ptr =
assert_cast<ColumnVariant*>(nested_object.get());
// flatten nested arrays
for (const auto& subcolumn : entry.second) {
const auto& column = subcolumn.column;
@@ -251,13 +249,13 @@ Status HierarchicalDataIterator::_process_nested_columns(
check_and_get_data_type<DataTypeArray>(remove_nullable(type).get())
->get_nested_type();
// add sub path without parent prefix
- nested_object_ptr->add_sub_column(
+ nested_object_variant->add_sub_column(
subcolumn.path.copy_pop_nfront(entry.first.get_parts().size()),
std::move(flattend_column), std::move(flattend_type));
}
- const size_t nested_object_size = nested_object->size();
- nested_object = ColumnNullable::create(std::move(nested_object),
-
ColumnUInt8::create(nested_object_size, 0));
+ const size_t nested_object_size = nested_object_variant->size();
+ MutableColumnPtr nested_object = ColumnNullable::create(
+ std::move(nested_object_variant),
ColumnUInt8::create(nested_object_size, 0));
auto array = ColumnArray::create(std::move(nested_object),
std::move(offset));
const size_t array_size = array->size();
auto nullable_array =
diff --git
a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
index 2a707adec86..3064d277e31 100644
--- a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
+++ b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
@@ -54,15 +54,14 @@ private:
Status _set_doc_value_into_variant(MutableColumnPtr& dst,
MutableColumnPtr&& doc_value_column,
size_t count) const {
auto& variant = assert_cast<ColumnVariant&>(*dst);
- MutableColumnPtr container =
ColumnVariant::create(variant.max_subcolumns_count(),
-
variant.enable_doc_mode(), count);
- auto& container_variant = assert_cast<ColumnVariant&>(*container);
- container_variant.set_doc_value_column(std::move(doc_value_column));
- variant.insert_range_from(container_variant, 0, count);
+ auto container = ColumnVariant::create(variant.max_subcolumns_count(),
+ variant.enable_doc_mode(),
count);
+ container->set_doc_value_column(std::move(doc_value_column));
+ variant.insert_range_from(*container, 0, count);
return Status::OK();
}
ColumnIteratorUPtr _doc_value_iterator;
};
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]