This is an automated email from the ASF dual-hosted git repository.

HappenLee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 905c80433b1 [fix](expr) fix mixed const probe constant handling 
regressions (#63810)
905c80433b1 is described below

commit 905c80433b1714027bc853b870de77eb415732e7
Author: Mryange <[email protected]>
AuthorDate: Mon Jun 1 12:18:26 2026 +0800

    [fix](expr) fix mixed const probe constant handling regressions (#63810)
    
    The mixed const execution probe exposed several constant-handling
    problems in BE vectorized functions.
    
    - ColumnConst::clone_resized reused the original nested column, so
    cloned const columns could still alias the source data.
    - quantile_percent requires its percentile argument to stay constant,
    but the all-const probe path unpacked it and triggered a false
    constant-check failure.
    - regexp_count accessed string columns directly and did not handle mixed
    const inputs correctly.
    - uniform still went through the default constant implementation even
    though its result depends on per-row seed values.
    
    This change fixes those behaviors and adds focused unit tests for the
    uncovered cases.
---
 be/src/core/column/column_const.h                  |  3 +-
 be/src/exprs/function/function_quantile_state.cpp  |  2 +
 be/src/exprs/function/function_regexp.cpp          | 20 +++++---
 be/src/exprs/function/uniform.cpp                  |  2 +
 be/test/core/column/column_const_test.cpp          | 13 +++++
 be/test/exprs/function/function_math_test.cpp      | 58 ++++++++++++++++++++++
 .../function/function_quantile_state_test.cpp      | 17 +++++++
 be/test/exprs/function/function_string_test.cpp    | 16 ++++++
 8 files changed, 122 insertions(+), 9 deletions(-)

diff --git a/be/src/core/column/column_const.h 
b/be/src/core/column/column_const.h
index cc8b94ff234..b213aeda0ff 100644
--- a/be/src/core/column/column_const.h
+++ b/be/src/core/column/column_const.h
@@ -124,7 +124,8 @@ public:
     void resize(size_t new_size) override { s = new_size; }
 
     MutableColumnPtr clone_resized(size_t new_size) const override {
-        return ColumnConst::create(data, new_size, false, false);
+        auto cloned_data = data->clone_resized(data->size());
+        return ColumnConst::create(std::move(cloned_data), new_size, false, 
false);
     }
 
     size_t size() const override { return s; }
diff --git a/be/src/exprs/function/function_quantile_state.cpp 
b/be/src/exprs/function/function_quantile_state.cpp
index 4019e84e65e..b4a0f59de3c 100644
--- a/be/src/exprs/function/function_quantile_state.cpp
+++ b/be/src/exprs/function/function_quantile_state.cpp
@@ -161,6 +161,8 @@ public:
 
     bool use_default_implementation_for_nulls() const override { return false; 
}
 
+    ColumnNumbers get_arguments_that_are_always_constant() const override { 
return {1}; }
+
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         uint32_t result, size_t input_rows_count) const 
override {
         auto res_data_column = ColumnFloat64::create();
diff --git a/be/src/exprs/function/function_regexp.cpp 
b/be/src/exprs/function/function_regexp.cpp
index 65d8dd25447..0476336f7ca 100644
--- a/be/src/exprs/function/function_regexp.cpp
+++ b/be/src/exprs/function/function_regexp.cpp
@@ -34,6 +34,7 @@
 #include "core/block/column_with_type_and_name.h"
 #include "core/column/column.h"
 #include "core/column/column_const.h"
+#include "core/column/column_execute_util.h"
 #include "core/column/column_nullable.h"
 #include "core/column/column_string.h"
 #include "core/column/column_vector.h"
@@ -188,23 +189,26 @@ struct RegexpExtractEngine {
 };
 
 struct RegexpCountImpl {
+    using StringColumnView = ColumnView<TYPE_STRING>;
+
     static void execute_impl(FunctionContext* context, ColumnPtr 
argument_columns[],
                              size_t input_rows_count, ColumnInt32::Container& 
result_data) {
-        const auto* str_col = 
check_and_get_column<ColumnString>(argument_columns[0].get());
-        const auto* pattern_col = 
check_and_get_column<ColumnString>(argument_columns[1].get());
-        for (int i = 0; i < input_rows_count; ++i) {
+        auto str_col = StringColumnView::create(argument_columns[0]);
+        auto pattern_col = StringColumnView::create(argument_columns[1]);
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            DCHECK(!str_col.is_null_at(i));
+            DCHECK(!pattern_col.is_null_at(i));
             result_data[i] = _execute_inner_loop(context, str_col, 
pattern_col, i);
         }
     }
-    static int _execute_inner_loop(FunctionContext* context, const 
ColumnString* str_col,
-                                   const ColumnString* pattern_col, const 
size_t index_now) {
+    static int _execute_inner_loop(FunctionContext* context, const 
StringColumnView& str_col,
+                                   const StringColumnView& pattern_col, const 
size_t index_now) {
         re2::RE2* re = reinterpret_cast<re2::RE2*>(
                 context->get_function_state(FunctionContext::THREAD_LOCAL));
         std::unique_ptr<re2::RE2> scoped_re;
         if (re == nullptr) {
             std::string error_str;
-            DCHECK(pattern_col);
-            const auto& pattern = 
pattern_col->get_data_at(index_check_const(index_now, false));
+            const auto pattern = pattern_col.value_at(index_now);
             bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(), StringRef(),
                                                      scoped_re);
             if (!st) {
@@ -215,7 +219,7 @@ struct RegexpCountImpl {
             re = scoped_re.get();
         }
 
-        const auto& str = str_col->get_data_at(index_now);
+        const auto str = str_col.value_at(index_now);
         int count = 0;
         size_t pos = 0;
         while (pos < str.size) {
diff --git a/be/src/exprs/function/uniform.cpp 
b/be/src/exprs/function/uniform.cpp
index 713d0f5c3ac..9f1dd3ad073 100644
--- a/be/src/exprs/function/uniform.cpp
+++ b/be/src/exprs/function/uniform.cpp
@@ -147,6 +147,8 @@ public:
     static FunctionPtr create() { return 
std::make_shared<FunctionUniform<Impl>>(); }
     String get_name() const override { return name; }
 
+    bool use_default_implementation_for_constants() const override { return 
false; }
+
     size_t get_number_of_arguments() const override {
         return get_variadic_argument_types_impl().size();
     }
diff --git a/be/test/core/column/column_const_test.cpp 
b/be/test/core/column/column_const_test.cpp
index f6f81ec3aab..e9f57df213b 100644
--- a/be/test/core/column/column_const_test.cpp
+++ b/be/test/core/column/column_const_test.cpp
@@ -41,6 +41,19 @@ TEST(ColumnConstTest, TestCreate) {
     EXPECT_TRUE(!is_column_const(column_const2->get_data_column()));
 }
 
+TEST(ColumnConstTest, clone_resized_clones_nested_data) {
+    auto column_data = ColumnHelper::create_column<DataTypeInt64>({7});
+    auto column_const = ColumnConst::create(column_data, 3);
+
+    auto cloned = column_const->clone_resized(5);
+    const auto& cloned_const = assert_cast<const ColumnConst&>(*cloned);
+
+    EXPECT_EQ(cloned_const.size(), 5);
+    EXPECT_EQ(cloned_const.get_data_column_ptr()->size(), 1);
+    EXPECT_EQ(cloned_const.get_data_column().get_int(0), 7);
+    EXPECT_NE(column_const->get_data_column_ptr().get(), 
cloned_const.get_data_column_ptr().get());
+}
+
 TEST(ColumnConstTest, TestFilter) {
     {
         auto column_data = ColumnHelper::create_column<DataTypeInt64>({7});
diff --git a/be/test/exprs/function/function_math_test.cpp 
b/be/test/exprs/function/function_math_test.cpp
index 4e51a5dc3e7..cf1b3a442ea 100644
--- a/be/test/exprs/function/function_math_test.cpp
+++ b/be/test/exprs/function/function_math_test.cpp
@@ -18,14 +18,17 @@
 #include <climits>
 #include <cstdint>
 #include <limits>
+#include <random>
 #include <string>
 
+#include "core/column/column_const.h"
 #include "core/data_type/data_type_decimal.h"
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/data_type_string.h"
 #include "core/types.h"
 #include "exprs/function/function_test_util.h"
 #include "testutil/any_type.h"
+#include "testutil/column_helper.h"
 
 namespace doris {
 
@@ -532,6 +535,11 @@ TEST(MathFunctionTest, hex_test) {
 }
 
 TEST(MathFunctionTest, random_test) {
+#ifndef NDEBUG
+    GTEST_SKIP() << "random(seed) exact-value assertions are release-only; 
debug builds run "
+                    "mock_const_execute before the real call.";
+#endif
+
     std::string func_name = "random"; // random(x)
     InputTypeSet input_types = {Consted {PrimitiveType::TYPE_BIGINT}};
     DataSet data_set = {{{Null()}, Null()},
@@ -547,6 +555,56 @@ TEST(MathFunctionTest, random_test) {
     }
 }
 
+TEST(MathFunctionTest, uniform_mixed_const_probe_test) {
+    auto input_type = std::make_shared<DataTypeInt64>();
+    auto return_type = std::make_shared<DataTypeInt64>();
+
+    Block block;
+    auto min_data = ColumnHelper::create_column<DataTypeInt64>({1});
+    auto max_data = ColumnHelper::create_column<DataTypeInt64>({10});
+    auto seed_column = ColumnHelper::create_column<DataTypeInt64>({101, 202, 
303});
+
+    block.insert({ColumnConst::create(min_data, 3), input_type, "min"});
+    block.insert({ColumnConst::create(max_data, 3), input_type, "max"});
+    block.insert({seed_column, input_type, "seed"});
+
+    FunctionBasePtr function = SimpleFunctionFactory::instance().get_function(
+            "uniform", block.get_columns_with_type_and_name(), return_type);
+    ASSERT_TRUE(function != nullptr);
+
+    block.insert({nullptr, return_type, "result"});
+
+    FunctionUtils fn_utils(return_type, {input_type, input_type, input_type}, 
false);
+    auto* fn_ctx = fn_utils.get_fn_ctx();
+    std::vector<std::shared_ptr<ColumnPtrWrapper>> constant_cols {
+            
std::make_shared<ColumnPtrWrapper>(block.get_by_position(0).column),
+            
std::make_shared<ColumnPtrWrapper>(block.get_by_position(1).column),
+            nullptr,
+    };
+    fn_ctx->set_constant_cols(constant_cols);
+
+    ASSERT_TRUE(function->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL).ok());
+    ASSERT_TRUE(function->open(fn_ctx, FunctionContext::THREAD_LOCAL).ok());
+
+    auto exec_status = function->execute(fn_ctx, block, {0, 1, 2}, 3, 3);
+
+    static_cast<void>(function->close(fn_ctx, FunctionContext::THREAD_LOCAL));
+    static_cast<void>(function->close(fn_ctx, 
FunctionContext::FRAGMENT_LOCAL));
+
+    ASSERT_TRUE(exec_status.ok()) << exec_status.to_string();
+
+    const auto& result_column = assert_cast<const 
ColumnInt64&>(*block.get_by_position(3).column);
+    auto expected_uniform = [](int64_t seed) {
+        std::mt19937_64 generator(seed);
+        std::uniform_int_distribution<int64_t> distribution(1, 10);
+        return distribution(generator);
+    };
+
+    EXPECT_EQ(result_column.get_element(0), expected_uniform(101));
+    EXPECT_EQ(result_column.get_element(1), expected_uniform(202));
+    EXPECT_EQ(result_column.get_element(2), expected_uniform(303));
+}
+
 TEST(MathFunctionTest, conv_test) {
     std::string func_name = "conv";
 
diff --git a/be/test/exprs/function/function_quantile_state_test.cpp 
b/be/test/exprs/function/function_quantile_state_test.cpp
index 1cb1ced1dae..e8f2fca7028 100644
--- a/be/test/exprs/function/function_quantile_state_test.cpp
+++ b/be/test/exprs/function/function_quantile_state_test.cpp
@@ -213,4 +213,21 @@ TEST(function_quantile_state_test, 
function_quantile_state_roundtrip) {
                 0.01);
 }
 
+TEST(function_quantile_state_test, function_quantile_percent_mixed_const_test) 
{
+    std::string func_name = "quantile_percent";
+    InputTypeSet input_types = {PrimitiveType::TYPE_QUANTILE_STATE,
+                                ConstedNotnull {PrimitiveType::TYPE_FLOAT}};
+
+    QuantileState quantile_state;
+    quantile_state.add_value(1.0);
+    quantile_state.add_value(2.0);
+    quantile_state.add_value(3.0);
+    quantile_state.add_value(4.0);
+    quantile_state.add_value(5.0);
+
+    DataSet data_set = {{{&quantile_state, 0.5F}, 3.0}};
+
+    static_cast<void>(check_function<DataTypeFloat64, false>(func_name, 
input_types, data_set));
+}
+
 } // namespace doris
diff --git a/be/test/exprs/function/function_string_test.cpp 
b/be/test/exprs/function/function_string_test.cpp
index 53df245904c..2e1aaa839c4 100644
--- a/be/test/exprs/function/function_string_test.cpp
+++ b/be/test/exprs/function/function_string_test.cpp
@@ -4003,4 +4003,20 @@ TEST(function_string_test, 
function_unicode_normalize_invalid_mode) {
     EXPECT_NE(Status::OK(), st);
 }
 
+TEST(function_string_test, function_regexp_count_mixed_const_test) {
+    std::string func_name = "regexp_count";
+
+    InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR, 
PrimitiveType::TYPE_VARCHAR};
+    DataSet data_set = {
+            {{std::string("a.b:c;d"), std::string("[.:;]")}, std::int32_t(3)},
+            {{std::string("a1b2346c3d"), std::string("\\d+")}, 
std::int32_t(3)},
+            {{std::string("abcd"), std::string("")}, std::int32_t(0)},
+            {{std::string("book keeper"), std::string("oo|ee")}, 
std::int32_t(2)},
+            {{Null(), std::string("\\d+")}, Null()},
+            {{std::string("abcd"), Null()}, Null()},
+    };
+
+    check_function_all_arg_comb<DataTypeInt32, true>(func_name, input_types, 
data_set);
+}
+
 } // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to