This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 1e891241cd2 branch-4.0: [opt](varbinary) support multi_distinct_count
about varbinary and add conf mapping iceberg uuid #59406 (#59483)
1e891241cd2 is described below
commit 1e891241cd28051ea44dca8a5b8ea6acf6308cae
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Dec 31 11:49:03 2025 +0800
branch-4.0: [opt](varbinary) support multi_distinct_count about varbinary
and add conf mapping iceberg uuid #59406 (#59483)
Cherry-picked from #59406
Co-authored-by: zhangstar333 <[email protected]>
---
be/src/vec/aggregate_functions/aggregate_function_uniq.cpp | 6 +++---
be/src/vec/aggregate_functions/aggregate_function_uniq.h | 9 +++++----
.../java/org/apache/doris/datasource/iceberg/IcebergUtils.java | 2 +-
.../data/external_table_p0/iceberg/test_iceberg_varbinary.out | 5 +++++
.../external_table_p0/iceberg/test_iceberg_varbinary.groovy | 8 ++++++++
5 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
b/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
index 4a6ecc42e57..126fec0032e 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
@@ -40,9 +40,9 @@ AggregateFunctionPtr create_aggregate_function_uniq(const
std::string& name,
return creator_with_type_list<
TYPE_BOOLEAN, TYPE_TINYINT, TYPE_SMALLINT, TYPE_INT, TYPE_BIGINT,
TYPE_LARGEINT,
TYPE_DECIMAL32, TYPE_DECIMAL64, TYPE_DECIMAL128I, TYPE_DECIMAL256,
TYPE_VARCHAR,
- TYPE_ARRAY, TYPE_FLOAT, TYPE_DOUBLE, TYPE_DATEV2, TYPE_DATETIMEV2,
- TYPE_TIMESTAMPTZ>::create<AggregateFunctionUniq,
Data>(argument_types,
-
result_is_nullable, attr);
+ TYPE_ARRAY, TYPE_FLOAT, TYPE_DOUBLE, TYPE_DATEV2, TYPE_DATETIMEV2,
TYPE_TIMESTAMPTZ,
+ TYPE_VARBINARY>::create<AggregateFunctionUniq,
Data>(argument_types, result_is_nullable,
+ attr);
}
void register_aggregate_function_uniq(AggregateFunctionSimpleFactory& factory)
{
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.h
b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
index f0108de4a1a..60d2484c3f2 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_uniq.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
@@ -28,6 +28,7 @@
#include <vector>
#include "common/compiler_util.h" // IWYU pragma: keep
+#include "runtime/primitive_type.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
#include "vec/columns/column_vector.h"
@@ -56,7 +57,7 @@ class ColumnDecimal;
template <PrimitiveType T>
struct AggregateFunctionUniqExactData {
- static constexpr bool is_string_key = is_string_type(T);
+ static constexpr bool is_string_key = is_string_type(T) || is_varbinary(T);
using Key = std::conditional_t<
is_string_key, UInt128,
std::conditional_t<T == TYPE_ARRAY, UInt64,
@@ -91,7 +92,7 @@ namespace detail {
template <PrimitiveType T, typename Data>
struct OneAdder {
static void ALWAYS_INLINE add(Data& data, const IColumn& column, size_t
row_num) {
- if constexpr (is_string_type(T)) {
+ if constexpr (is_string_type(T) || is_varbinary(T)) {
StringRef value = column.get_data_at(row_num);
data.set.insert(Data::get_key(value));
} else if constexpr (T == TYPE_ARRAY) {
@@ -119,7 +120,7 @@ class AggregateFunctionUniq final
NotNullableAggregateFunction {
public:
using KeyType =
- std::conditional_t<is_string_type(T), UInt128,
+ std::conditional_t<is_string_type(T) || is_varbinary(T), UInt128,
std::conditional_t<T == TYPE_ARRAY, UInt64,
typename
PrimitiveTypeTraits<T>::ColumnItemType>>;
AggregateFunctionUniq(const DataTypes& argument_types_)
@@ -138,7 +139,7 @@ public:
static ALWAYS_INLINE const KeyType* get_keys(std::vector<KeyType>&
keys_container,
const IColumn& column, size_t
batch_size) {
- if constexpr (is_string_type(T)) {
+ if constexpr (is_string_type(T) || is_varbinary(T)) {
keys_container.resize(batch_size);
for (size_t i = 0; i != batch_size; ++i) {
StringRef value = column.get_data_at(i);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 001a9a85903..c6fb0143dd2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -572,7 +572,7 @@ public class IcebergUtils {
case STRING:
return Type.STRING;
case UUID:
- return ScalarType.createVarbinaryType(16);
+ return enableMappingVarbinary ?
ScalarType.createVarbinaryType(16) : Type.STRING;
case BINARY:
return enableMappingVarbinary ?
ScalarType.createVarbinaryType(VarBinaryType.MAX_VARBINARY_LENGTH)
: Type.STRING;
diff --git
a/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
b/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
index 122979181ac..5845c71a460 100644
--- a/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
+++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out
@@ -83,3 +83,8 @@
3 0x00000000000000000000000000000000 0x00
7 0xABAB 0xABAB
+-- !select21 --
+3 3
+
+-- !select22 --
+3 3
diff --git
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
index b9b82f915e5..9db7b5fb934 100644
---
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
+++
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy
@@ -149,4 +149,12 @@ suite("test_iceberg_varbinary",
"p0,external,doris,external_docker,external_dock
qt_select19 """
select * from test_ice_uuid_parquet_write_with_mapping order by id;
"""
+
+ qt_select21 """
+ select multi_distinct_count(col2),multi_distinct_count(col1) from
test_ice_uuid_orc;
+ """
+
+ qt_select22 """
+ select multi_distinct_count(col2),multi_distinct_count(col1) from
test_ice_uuid_parquet;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]