This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 1e891241cd2 branch-4.0: [opt](varbinary) support multi_distinct_count 
about varbinary and add conf mapping iceberg uuid #59406 (#59483)
1e891241cd2 is described below

commit 1e891241cd28051ea44dca8a5b8ea6acf6308cae
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Dec 31 11:49:03 2025 +0800

    branch-4.0: [opt](varbinary) support multi_distinct_count about varbinary 
and add conf mapping iceberg uuid #59406 (#59483)
    
    Cherry-picked from #59406
    
    Co-authored-by: zhangstar333 <[email protected]>
---
 be/src/vec/aggregate_functions/aggregate_function_uniq.cpp       | 6 +++---
 be/src/vec/aggregate_functions/aggregate_function_uniq.h         | 9 +++++----
 .../java/org/apache/doris/datasource/iceberg/IcebergUtils.java   | 2 +-
 .../data/external_table_p0/iceberg/test_iceberg_varbinary.out    | 5 +++++
 .../external_table_p0/iceberg/test_iceberg_varbinary.groovy      | 8 ++++++++
 5 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp 
b/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
index 4a6ecc42e57..126fec0032e 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
@@ -40,9 +40,9 @@ AggregateFunctionPtr create_aggregate_function_uniq(const 
std::string& name,
     return creator_with_type_list<
             TYPE_BOOLEAN, TYPE_TINYINT, TYPE_SMALLINT, TYPE_INT, TYPE_BIGINT, 
TYPE_LARGEINT,
             TYPE_DECIMAL32, TYPE_DECIMAL64, TYPE_DECIMAL128I, TYPE_DECIMAL256, 
TYPE_VARCHAR,
-            TYPE_ARRAY, TYPE_FLOAT, TYPE_DOUBLE, TYPE_DATEV2, TYPE_DATETIMEV2,
-            TYPE_TIMESTAMPTZ>::create<AggregateFunctionUniq, 
Data>(argument_types,
-                                                                   
result_is_nullable, attr);
+            TYPE_ARRAY, TYPE_FLOAT, TYPE_DOUBLE, TYPE_DATEV2, TYPE_DATETIMEV2, 
TYPE_TIMESTAMPTZ,
+            TYPE_VARBINARY>::create<AggregateFunctionUniq, 
Data>(argument_types, result_is_nullable,
+                                                                 attr);
 }
 
 void register_aggregate_function_uniq(AggregateFunctionSimpleFactory& factory) 
{
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.h 
b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
index f0108de4a1a..60d2484c3f2 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_uniq.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
@@ -28,6 +28,7 @@
 #include <vector>
 
 #include "common/compiler_util.h" // IWYU pragma: keep
+#include "runtime/primitive_type.h"
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_vector.h"
@@ -56,7 +57,7 @@ class ColumnDecimal;
 
 template <PrimitiveType T>
 struct AggregateFunctionUniqExactData {
-    static constexpr bool is_string_key = is_string_type(T);
+    static constexpr bool is_string_key = is_string_type(T) || is_varbinary(T);
     using Key = std::conditional_t<
             is_string_key, UInt128,
             std::conditional_t<T == TYPE_ARRAY, UInt64,
@@ -91,7 +92,7 @@ namespace detail {
 template <PrimitiveType T, typename Data>
 struct OneAdder {
     static void ALWAYS_INLINE add(Data& data, const IColumn& column, size_t 
row_num) {
-        if constexpr (is_string_type(T)) {
+        if constexpr (is_string_type(T) || is_varbinary(T)) {
             StringRef value = column.get_data_at(row_num);
             data.set.insert(Data::get_key(value));
         } else if constexpr (T == TYPE_ARRAY) {
@@ -119,7 +120,7 @@ class AggregateFunctionUniq final
           NotNullableAggregateFunction {
 public:
     using KeyType =
-            std::conditional_t<is_string_type(T), UInt128,
+            std::conditional_t<is_string_type(T) || is_varbinary(T), UInt128,
                                std::conditional_t<T == TYPE_ARRAY, UInt64,
                                                   typename 
PrimitiveTypeTraits<T>::ColumnItemType>>;
     AggregateFunctionUniq(const DataTypes& argument_types_)
@@ -138,7 +139,7 @@ public:
 
     static ALWAYS_INLINE const KeyType* get_keys(std::vector<KeyType>& 
keys_container,
                                                  const IColumn& column, size_t 
batch_size) {
-        if constexpr (is_string_type(T)) {
+        if constexpr (is_string_type(T) || is_varbinary(T)) {
             keys_container.resize(batch_size);
             for (size_t i = 0; i != batch_size; ++i) {
                 StringRef value = column.get_data_at(i);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 001a9a85903..c6fb0143dd2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -572,7 +572,7 @@ public class IcebergUtils {
             case STRING:
                 return Type.STRING;
             case UUID:
-                return ScalarType.createVarbinaryType(16);
+                return enableMappingVarbinary ? 
ScalarType.createVarbinaryType(16) : Type.STRING;
             case BINARY:
                 return enableMappingVarbinary ? 
ScalarType.createVarbinaryType(VarBinaryType.MAX_VARBINARY_LENGTH)
                         : Type.STRING;
diff --git 
a/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out 
b/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
index 122979181ac..5845c71a460 100644
--- a/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
+++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
@@ -83,3 +83,8 @@
 3      0x00000000000000000000000000000000      0x00
 7      0xABAB  0xABAB
 
+-- !select21 --
+3      3
+
+-- !select22 --
+3      3
diff --git 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
index b9b82f915e5..9db7b5fb934 100644
--- 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
+++ 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
@@ -149,4 +149,12 @@ suite("test_iceberg_varbinary", 
"p0,external,doris,external_docker,external_dock
     qt_select19 """
         select * from test_ice_uuid_parquet_write_with_mapping order by id;
     """
+
+    qt_select21 """
+        select multi_distinct_count(col2),multi_distinct_count(col1) from 
test_ice_uuid_orc;
+    """
+
+    qt_select22 """
+        select multi_distinct_count(col2),multi_distinct_count(col1) from 
test_ice_uuid_parquet;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to