This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 2cf5b2a34eb [Bug](set) avoid empty string equal with null on set
operator #60062 (#60067)
2cf5b2a34eb is described below
commit 2cf5b2a34eb86160aff95b94b8358a3fad100754
Author: Pxl <[email protected]>
AuthorDate: Tue Jan 20 23:54:00 2026 +0800
[Bug](set) avoid empty string equal with null on set operator #60062
(#60067)
#60062
---
be/src/pipeline/common/set_utils.h | 10 +-
.../data/query_p0/set/rqg_prod_20240226/data | 2 +
.../set/rqg_prod_20240226/rqg_prod_20240226.out | 11 ++
.../set/rqg_prod_20240226/rqg_prod_20240226.groovy | 150 +++++++++++++++++++++
4 files changed, 172 insertions(+), 1 deletion(-)
diff --git a/be/src/pipeline/common/set_utils.h
b/be/src/pipeline/common/set_utils.h
index d08ad883b83..153b4ed6ee9 100644
--- a/be/src/pipeline/common/set_utils.h
+++ b/be/src/pipeline/common/set_utils.h
@@ -51,9 +51,13 @@ using SetPrimaryTypeHashTableContextNullable =
vectorized::MethodSingleNullableC
using SetSerializedHashTableContext =
vectorized::MethodSerialized<PHHashMap<StringRef, RowRefWithFlag>>;
using SetMethodOneString =
vectorized::MethodStringNoCache<PHHashMap<StringRef, RowRefWithFlag>>;
+using SetMethodOneStringNullable =
+ vectorized::MethodSingleNullableColumn<vectorized::MethodStringNoCache<
+ vectorized::DataWithNullKey<PHHashMap<StringRef,
RowRefWithFlag>>>>;
using SetHashTableVariants =
std::variant<std::monostate, SetSerializedHashTableContext,
SetMethodOneString,
+ SetMethodOneStringNullable,
SetPrimaryTypeHashTableContextNullable<vectorized::UInt8>,
SetPrimaryTypeHashTableContextNullable<vectorized::UInt16>,
SetPrimaryTypeHashTableContextNullable<vectorized::UInt32>,
@@ -102,7 +106,11 @@ struct SetDataVariants
emplace_single<vectorized::UInt256,
SetData<vectorized::UInt256>>(nullable);
break;
case HashKeyType::string_key:
- method_variant.emplace<SetMethodOneString>();
+ if (nullable) {
+ method_variant.emplace<SetMethodOneStringNullable>();
+ } else {
+ method_variant.emplace<SetMethodOneString>();
+ }
break;
case HashKeyType::fixed64:
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt64>>(
diff --git a/regression-test/data/query_p0/set/rqg_prod_20240226/data
b/regression-test/data/query_p0/set/rqg_prod_20240226/data
new file mode 100644
index 00000000000..dc9d052a01b
--- /dev/null
+++ b/regression-test/data/query_p0/set/rqg_prod_20240226/data
@@ -0,0 +1,2 @@
+
+INSERT INTO `table_200_undef_partitions2_keys3_properties4_distributed_by511`
VALUES
(-5,19,NULL,0,20,6,12,0,-14,-19,1447233617541975675,'17','14',-15,40.00,300.34,-8.000000,-7.000000,'2024-03-02','2024-03-09','2024-02-20
00:24:00','2024-06-30 12:01:02','2024-02-20 00:13:00.000000','2024-02-20
00:27:00.000000','CD','cd','a','BB','❤️','❤️b','173.96.58.66','116.231.26.14','4202:4634:f6b4:313f:928b:d159:1490:97a5','c27f:5d3:6e23:e4a2:4f61:f804:4557:c907',113),(10,-17,0,0,1,-19,7,15,3,18,-7,
[...]
\ No newline at end of file
diff --git
a/regression-test/data/query_p0/set/rqg_prod_20240226/rqg_prod_20240226.out
b/regression-test/data/query_p0/set/rqg_prod_20240226/rqg_prod_20240226.out
new file mode 100644
index 00000000000..47649a54622
--- /dev/null
+++ b/regression-test/data/query_p0/set/rqg_prod_20240226/rqg_prod_20240226.out
@@ -0,0 +1,11 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !test --
+\N
+0_suffix
+1
+1_suffix
+B
+BC
+abc
+a❤️
+
diff --git
a/regression-test/suites/query_p0/set/rqg_prod_20240226/rqg_prod_20240226.groovy
b/regression-test/suites/query_p0/set/rqg_prod_20240226/rqg_prod_20240226.groovy
new file mode 100644
index 00000000000..117b3dfcb3e
--- /dev/null
+++
b/regression-test/suites/query_p0/set/rqg_prod_20240226/rqg_prod_20240226.groovy
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("rqg_prod_20240226") {
+ sql """
+ DROP TABLE IF EXISTS
`table_200_undef_partitions2_keys3_properties4_distributed_by511`;
+ """
+ sql """
+CREATE TABLE `table_200_undef_partitions2_keys3_properties4_distributed_by511`
(
+ `col_tinyint_undef_signed_not_null` tinyint NOT NULL,
+ `col_decimal_5_0__undef_signed_not_null` decimal(5,0) NOT NULL,
+ `col_boolean_undef_signed` boolean NULL,
+ `col_boolean_undef_signed_not_null` boolean NOT NULL,
+ `col_tinyint_undef_signed` tinyint NULL,
+ `col_smallint_undef_signed` smallint NULL,
+ `col_smallint_undef_signed_not_null` smallint NOT NULL,
+ `col_int_undef_signed` int NULL,
+ `col_int_undef_signed_not_null` int NOT NULL,
+ `col_bigint_undef_signed` bigint NULL,
+ `col_bigint_undef_signed_not_null` bigint NOT NULL,
+ `col_largeint_undef_signed` largeint NULL,
+ `col_largeint_undef_signed_not_null` largeint NOT NULL,
+ `col_decimal_5_0__undef_signed` decimal(5,0) NULL,
+ `col_decimal_12_2__undef_signed` decimal(12,2) NULL,
+ `col_decimal_12_2__undef_signed_not_null` decimal(12,2) NOT NULL,
+ `col_decimal_32_6__undef_signed` decimal(32,6) NULL,
+ `col_decimal_32_6__undef_signed_not_null` decimal(32,6) NOT NULL,
+ `col_date_undef_signed` date NULL,
+ `col_date_undef_signed_not_null` date NOT NULL,
+ `col_datetime_undef_signed` datetime NULL,
+ `col_datetime_undef_signed_not_null` datetime NOT NULL,
+ `col_datetime_6__undef_signed` datetime(6) NULL,
+ `col_datetime_6__undef_signed_not_null` datetime(6) NOT NULL,
+ `col_char_50__undef_signed` char(50) NULL,
+ `col_char_50__undef_signed_not_null` char(50) NOT NULL,
+ `col_varchar_100__undef_signed` varchar(100) NULL,
+ `col_varchar_100__undef_signed_not_null` varchar(100) NOT NULL,
+ `col_string_undef_signed` text NULL,
+ `col_string_undef_signed_not_null` text NOT NULL,
+ `col_ipv4_undef_signed` ipv4 NULL,
+ `col_ipv4_undef_signed_not_null` ipv4 NOT NULL,
+ `col_ipv6_undef_signed` ipv6 NULL,
+ `col_ipv6_undef_signed_not_null` ipv6 NOT NULL,
+ `pk` int NULL
+) ENGINE=OLAP
+UNIQUE KEY(`col_tinyint_undef_signed_not_null`,
`col_decimal_5_0__undef_signed_not_null`)
+DISTRIBUTED BY HASH(`col_tinyint_undef_signed_not_null`) BUCKETS 10
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V2",
+"enable_unique_key_merge_on_write" = "true",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728",
+"enable_mow_light_delete" = "false"
+);
+ """
+
+
+
+ sql """
+ DROP TABLE IF EXISTS
`table_20_undef_partitions2_keys3_properties4_distributed_by54`;
+ """
+ sql """
+
+CREATE TABLE `table_20_undef_partitions2_keys3_properties4_distributed_by54` (
+ `col_tinyint_undef_signed_not_null` tinyint NOT NULL,
+ `col_decimal_5_0__undef_signed_not_null` decimal(5,0) NOT NULL,
+ `col_boolean_undef_signed` boolean NULL,
+ `col_boolean_undef_signed_not_null` boolean NOT NULL,
+ `col_tinyint_undef_signed` tinyint NULL,
+ `col_smallint_undef_signed` smallint NULL,
+ `col_smallint_undef_signed_not_null` smallint NOT NULL,
+ `col_int_undef_signed` int NULL,
+ `col_int_undef_signed_not_null` int NOT NULL,
+ `col_bigint_undef_signed` bigint NULL,
+ `col_bigint_undef_signed_not_null` bigint NOT NULL,
+ `col_largeint_undef_signed` largeint NULL,
+ `col_largeint_undef_signed_not_null` largeint NOT NULL,
+ `col_decimal_5_0__undef_signed` decimal(5,0) NULL,
+ `col_decimal_12_2__undef_signed` decimal(12,2) NULL,
+ `col_decimal_12_2__undef_signed_not_null` decimal(12,2) NOT NULL,
+ `col_decimal_32_6__undef_signed` decimal(32,6) NULL,
+ `col_decimal_32_6__undef_signed_not_null` decimal(32,6) NOT NULL,
+ `col_date_undef_signed` date NULL,
+ `col_date_undef_signed_not_null` date NOT NULL,
+ `col_datetime_undef_signed` datetime NULL,
+ `col_datetime_undef_signed_not_null` datetime NOT NULL,
+ `col_datetime_6__undef_signed` datetime(6) NULL,
+ `col_datetime_6__undef_signed_not_null` datetime(6) NOT NULL,
+ `col_char_50__undef_signed` char(50) NULL,
+ `col_char_50__undef_signed_not_null` char(50) NOT NULL,
+ `col_varchar_100__undef_signed` varchar(100) NULL,
+ `col_varchar_100__undef_signed_not_null` varchar(100) NOT NULL,
+ `col_string_undef_signed` text NULL,
+ `col_string_undef_signed_not_null` text NOT NULL,
+ `col_ipv4_undef_signed` ipv4 NULL,
+ `col_ipv4_undef_signed_not_null` ipv4 NOT NULL,
+ `col_ipv6_undef_signed` ipv6 NULL,
+ `col_ipv6_undef_signed_not_null` ipv6 NOT NULL,
+ `pk` int NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`col_tinyint_undef_signed_not_null`,
`col_decimal_5_0__undef_signed_not_null`)
+DISTRIBUTED BY HASH(`pk`) BUCKETS 10
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V2",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
+ """
+
+ sql """INSERT INTO
`table_20_undef_partitions2_keys3_properties4_distributed_by54` VALUES
(-5,-3,0,1,16,17,10,147483648,-12,NULL,17,'10','8',300,-6.00,-8.00,-6.000000,100.000000,'2030-12-31','2014-08-12','2024-06-30
12:01:02','2024-02-20 00:28:00','2024-02-20 00:29:00.000000','2024-02-20
00:03:00.000000','B❤️','2030-12-31
23:59:59','C❤️','a❤️','❤️❤️','DEF_suffix','227.74.10.141','192.26.168.254','c27f:5d3:6e23:e4a2:4f61:f804:4557:c907','545c:77a8:5c6a:dcb4:5938:f4f2:7a31:837b',4),(19
[...]
+ """
+
+ // load and execute data.sql in the same directory, use simple direct load
as in other test cases
+ sql new
File("${context.config.dataPath}/query_p0/set/rqg_prod_20240226/data").text
+ sql 'sync'
+
+ qt_test """
+ select t1.col_string_undef_signed from
table_200_undef_partitions2_keys3_properties4_distributed_by511 as t1 where t1.
col_datetime_6__undef_signed_not_null <= '2009-07-21 12:26:28' except distinct
select SUBSTRING(t1.col_varchar_100__undef_signed_not_null, 1, -9) from
table_20_undef_partitions2_keys3_properties4_distributed_by54 as t1 where t1.
col_char_50__undef_signed >= 'v' order by 1;
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]