This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 853033615ad [Bug](bits) fix wrong result of count_zero_num with
nullmap (#56113)
853033615ad is described below
commit 853033615adad33ba2916c3844316e0ab6964b91
Author: Pxl <[email protected]>
AuthorDate: Wed Sep 17 11:17:32 2025 +0800
[Bug](bits) fix wrong result of count_zero_num with nullmap (#56113)
fix wrong result of count_zero_num with nullmap
---
be/src/util/simd/bits.h | 2 +-
be/test/util/bit_util_test.cpp | 93 +++++++++++++++
.../test_conditional_function.out | Bin 3048 -> 3303 bytes
.../sql_functions/conditional_functions/data.txt | 1 +
.../test_conditional_function.groovy | 128 +++++++++++++++++++++
5 files changed, 223 insertions(+), 1 deletion(-)
diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h
index 6c0d456254b..186e43746d2 100644
--- a/be/src/util/simd/bits.h
+++ b/be/src/util/simd/bits.h
@@ -164,7 +164,7 @@ inline T count_zero_num(const int8_t* __restrict data,
const uint8_t* __restrict
const __m128i zero16 = _mm_setzero_si128();
const int8_t* end64 = data + (size / 64 * 64);
- for (; data < end64; data += 64) {
+ for (; data < end64; data += 64, null_map += 64) {
num += __builtin_popcountll(
static_cast<uint64_t>(_mm_movemask_epi8(_mm_or_si128(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const
__m128i*>(data)),
diff --git a/be/test/util/bit_util_test.cpp b/be/test/util/bit_util_test.cpp
index f355b6a1c04..12345d03417 100644
--- a/be/test/util/bit_util_test.cpp
+++ b/be/test/util/bit_util_test.cpp
@@ -19,11 +19,16 @@
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
#include <bit>
#include <boost/utility/binary.hpp>
+#include <random>
#include "gtest/gtest_pred_impl.h"
+#include "runtime/primitive_type.h"
+#include "util/simd/bits.h"
+#include "vec/columns/column_nullable.h"
#include "vec/common/endian.h"
namespace doris {
@@ -56,4 +61,92 @@ TEST(BitUtil, BigEndianToHost) {
wide::UInt256(0xf0debc9a78563412) << 64 |
wide::UInt256(0xf0debc9a78563412));
}
+void insert_true(vectorized::ColumnNullable* column, size_t num = 1) {
+ for (int i = 0; i < num; i++) {
+
assert_cast<vectorized::ColumnUInt8*>(column->get_nested_column_ptr().get())
+ ->insert_value(1);
+ column->push_false_to_nullmap(1);
+ }
+}
+
+void insert_false(vectorized::ColumnNullable* column, size_t num = 1) {
+ for (int i = 0; i < num; i++) {
+
assert_cast<vectorized::ColumnUInt8*>(column->get_nested_column_ptr().get())
+ ->insert_value(0);
+ column->push_false_to_nullmap(1);
+ }
+}
+
+void insert_null(vectorized::ColumnNullable* column, size_t num = 1) {
+ for (int i = 0; i < num; i++) {
+ column->insert_default();
+ }
+}
+
+size_t brute_force_count_zero_num(const uint8_t* __restrict data,
+ const uint8_t* __restrict null_map, size_t
size) {
+ size_t num = 0;
+ for (size_t i = 0; i < size; ++i) {
+ if (data[i] == 0 || null_map[i]) {
+ num++;
+ }
+ }
+ return num;
+}
+
+TEST(BitUtil, CountZero) {
+ {
+ auto column =
vectorized::ColumnNullable::create(vectorized::ColumnUInt8::create(),
+
vectorized::ColumnUInt8::create());
+ insert_false(column.get(), 5);
+ insert_null(column.get(), 1);
+ insert_false(column.get(), 8);
+ insert_null(column.get(), 1);
+ insert_false(column.get(), 54);
+ insert_true(column.get(), 1);
+ insert_false(column.get(), 14);
+ ASSERT_EQ(
+ brute_force_count_zero_num(assert_cast<const
vectorized::ColumnUInt8*>(
+
column->get_nested_column_ptr().get())
+ ->get_data()
+ .data(),
+ column->get_null_map_data().data(),
column->size()),
+ simd::count_zero_num((int8_t*)assert_cast<const
vectorized::ColumnUInt8*>(
+
column->get_nested_column_ptr().get())
+ ->get_data()
+ .data(),
+ column->get_null_map_data().data(),
(uint32_t)column->size()));
+ }
+
+ {
+ auto column =
vectorized::ColumnNullable::create(vectorized::ColumnUInt8::create(),
+
vectorized::ColumnUInt8::create());
+ std::mt19937 rng(12345);
+ std::uniform_int_distribution<int> val_dist(0, 1);
+ std::uniform_int_distribution<int> null_dist(0, 5);
+ for (int i = 0; i < 10000; ++i) {
+ if (null_dist(rng) == 0) {
+ insert_null(column.get(), 1);
+ } else {
+ if (val_dist(rng) == 0) {
+ insert_false(column.get(), 1);
+ } else {
+ insert_true(column.get(), 1);
+ }
+ }
+ }
+ ASSERT_EQ(
+ brute_force_count_zero_num(assert_cast<const
vectorized::ColumnUInt8*>(
+
column->get_nested_column_ptr().get())
+ ->get_data()
+ .data(),
+ column->get_null_map_data().data(),
column->size()),
+ simd::count_zero_num((int8_t*)assert_cast<const
vectorized::ColumnUInt8*>(
+
column->get_nested_column_ptr().get())
+ ->get_data()
+ .data(),
+ column->get_null_map_data().data(),
(uint32_t)column->size()));
+ }
+}
+
} // namespace doris
diff --git
a/regression-test/data/query_p0/sql_functions/conditional_functions/test_conditional_function.out
b/regression-test/data/query_p0/sql_functions/conditional_functions/test_conditional_function.out
index 417f851a067..83a491bf27e 100644
Binary files
a/regression-test/data/query_p0/sql_functions/conditional_functions/test_conditional_function.out
and
b/regression-test/data/query_p0/sql_functions/conditional_functions/test_conditional_function.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/conditional_functions/data.txt
b/regression-test/suites/query_p0/sql_functions/conditional_functions/data.txt
new file mode 100644
index 00000000000..1501e0a3165
--- /dev/null
+++
b/regression-test/suites/query_p0/sql_functions/conditional_functions/data.txt
@@ -0,0 +1 @@
+insert into
table_800_undef_partitions2_keys3_properties4_distributed_by524(pk,col_boolean_undef_signed,col_boolean_undef_signed_not_null,col_tinyint_undef_signed,col_tinyint_undef_signed_index_inverted,col_tinyint_undef_signed_not_null,col_tinyint_undef_signed_not_null_index_inverted,col_smallint_undef_signed,col_smallint_undef_signed_index_inverted,col_smallint_undef_signed_not_null,col_smallint_undef_signed_not_null_index_inverted,col_int_undef_signed,col_int_undef_signed_index_invert
[...]
diff --git
a/regression-test/suites/query_p0/sql_functions/conditional_functions/test_conditional_function.groovy
b/regression-test/suites/query_p0/sql_functions/conditional_functions/test_conditional_function.groovy
index 8f1b659588c..bee3b53f95f 100644
---
a/regression-test/suites/query_p0/sql_functions/conditional_functions/test_conditional_function.groovy
+++
b/regression-test/suites/query_p0/sql_functions/conditional_functions/test_conditional_function.groovy
@@ -15,6 +15,10 @@
// specific language governing permissions and limitations
// under the License.
+import groovy.io.FileType
+import java.nio.file.Files
+import java.nio.file.Paths
+
suite("test_conditional_function") {
sql "set batch_size = 4096;"
@@ -207,4 +211,128 @@ insert into
table_50_undef_partitions2_keys3_properties4_distributed_by54(pk,col
qt_test """
SELECT TO_DATE ( table1 . `col_date_undef_signed_not_null` ) AS field1, MAX(
distinct table1 . `col_int_undef_signed_not_null` ) AS field2, ( TO_DATE (CASE
table1 . col_date_undef_signed_not_null WHEN table1 .
col_date_undef_signed_not_null THEN DATE_ADD( table1 .
`col_date_undef_signed_not_null` , INTERVAL 3 YEAR ) WHEN table1 .
col_date_undef_signed THEN '2024-01-31' WHEN '2025-02-18' THEN '2024-02-18'
WHEN '2008-09-25' THEN DATE_SUB( table1 . `col_date_undef_signed` , INTERVAL 7
DAY ) [...]
"""
+
+ sql "drop table if exists
table_800_undef_partitions2_keys3_properties4_distributed_by524;"
+ sql """
+create table table_800_undef_partitions2_keys3_properties4_distributed_by524 (
+pk int,
+col_int_undef_signed_index_inverted int null ,
+col_date_undef_signed_not_null date not null ,
+col_varchar_1024__undef_signed varchar(1024) null ,
+col_boolean_undef_signed boolean null ,
+col_boolean_undef_signed_not_null boolean not null ,
+col_tinyint_undef_signed tinyint null ,
+col_tinyint_undef_signed_index_inverted tinyint null ,
+col_tinyint_undef_signed_not_null tinyint not null ,
+col_tinyint_undef_signed_not_null_index_inverted tinyint not null ,
+col_smallint_undef_signed smallint null ,
+col_smallint_undef_signed_index_inverted smallint null ,
+col_smallint_undef_signed_not_null smallint not null ,
+col_smallint_undef_signed_not_null_index_inverted smallint not null ,
+col_int_undef_signed int null ,
+col_int_undef_signed_not_null int not null ,
+col_int_undef_signed_not_null_index_inverted int not null ,
+col_bigint_undef_signed bigint null ,
+col_bigint_undef_signed_index_inverted bigint null ,
+col_bigint_undef_signed_not_null bigint not null ,
+col_bigint_undef_signed_not_null_index_inverted bigint not null ,
+col_decimal_16__8__undef_signed decimal(16, 8) null ,
+col_decimal_16__8__undef_signed_index_inverted decimal(16, 8) null ,
+col_decimal_16__8__undef_signed_not_null decimal(16, 8) not null ,
+col_decimal_16__8__undef_signed_not_null_index_inverted decimal(16, 8) not
null ,
+col_decimal_38__9__undef_signed decimal(38, 9) null ,
+col_decimal_38__9__undef_signed_index_inverted decimal(38, 9) null ,
+col_decimal_38__9__undef_signed_not_null decimal(38, 9) not null ,
+col_decimal_38__9__undef_signed_not_null_index_inverted decimal(38, 9) not
null ,
+col_decimal_38__30__undef_signed decimal(38, 30) null ,
+col_decimal_38__30__undef_signed_index_inverted decimal(38, 30) null ,
+col_decimal_38__30__undef_signed_not_null decimal(38, 30) not null ,
+col_decimal_38__30__undef_signed_not_null_index_inverted decimal(38, 30) not
null ,
+col_date_undef_signed date null ,
+col_date_undef_signed_index_inverted date null ,
+col_date_undef_signed_not_null_index_inverted date not null ,
+col_datetime_undef_signed datetime null ,
+col_datetime_undef_signed_index_inverted datetime null ,
+col_datetime_undef_signed_not_null datetime not null ,
+col_datetime_undef_signed_not_null_index_inverted datetime not null ,
+col_datetime_3__undef_signed datetime(3) null ,
+col_datetime_3__undef_signed_index_inverted datetime(3) null ,
+col_datetime_3__undef_signed_not_null datetime(3) not null ,
+col_datetime_3__undef_signed_not_null_index_inverted datetime(3) not null ,
+col_datetime_6__undef_signed datetime(6) null ,
+col_datetime_6__undef_signed_index_inverted datetime(6) null ,
+col_datetime_6__undef_signed_not_null datetime(6) not null ,
+col_datetime_6__undef_signed_not_null_index_inverted datetime(6) not null ,
+col_char_255__undef_signed char(255) null ,
+col_char_255__undef_signed_index_inverted char(255) null ,
+col_char_255__undef_signed_index_inverted_p_e char(255) null ,
+col_char_255__undef_signed_index_inverted_p_u char(255) null ,
+col_char_255__undef_signed_not_null char(255) not null ,
+col_char_255__undef_signed_not_null_index_inverted char(255) not null ,
+col_char_255__undef_signed_not_null_index_inverted_p_e char(255) not null ,
+col_char_255__undef_signed_not_null_index_inverted_p_u char(255) not null ,
+col_varchar_1024__undef_signed_index_inverted varchar(1024) null ,
+col_varchar_1024__undef_signed_index_inverted_p_e varchar(1024) null ,
+col_varchar_1024__undef_signed_index_inverted_p_u varchar(1024) null ,
+col_varchar_1024__undef_signed_not_null varchar(1024) not null ,
+col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024) not null
,
+col_varchar_1024__undef_signed_not_null_index_inverted_p_e varchar(1024) not
null ,
+col_varchar_1024__undef_signed_not_null_index_inverted_p_u varchar(1024) not
null ,
+INDEX col_tinyint_undef_signed_index_inverted_idx
(`col_tinyint_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_tinyint_undef_signed_not_null_index_inverted_idx
(`col_tinyint_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_smallint_undef_signed_index_inverted_idx
(`col_smallint_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_smallint_undef_signed_not_null_index_inverted_idx
(`col_smallint_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_int_undef_signed_index_inverted_idx
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_int_undef_signed_not_null_index_inverted_idx
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_bigint_undef_signed_index_inverted_idx
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_bigint_undef_signed_not_null_index_inverted_idx
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_decimal_16__8__undef_signed_index_inverted_idx
(`col_decimal_16__8__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_decimal_16__8__undef_signed_not_null_index_inverted_idx
(`col_decimal_16__8__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_decimal_38__9__undef_signed_index_inverted_idx
(`col_decimal_38__9__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_decimal_38__9__undef_signed_not_null_index_inverted_idx
(`col_decimal_38__9__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_decimal_38__30__undef_signed_index_inverted_idx
(`col_decimal_38__30__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_decimal_38__30__undef_signed_not_null_index_inverted_idx
(`col_decimal_38__30__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_date_undef_signed_index_inverted_idx
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_date_undef_signed_not_null_index_inverted_idx
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_datetime_undef_signed_index_inverted_idx
(`col_datetime_undef_signed_index_inverted`) USING INVERTED,
+INDEX col_datetime_undef_signed_not_null_index_inverted_idx
(`col_datetime_undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_datetime_3__undef_signed_index_inverted_idx
(`col_datetime_3__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_datetime_3__undef_signed_not_null_index_inverted_idx
(`col_datetime_3__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_datetime_6__undef_signed_index_inverted_idx
(`col_datetime_6__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_datetime_6__undef_signed_not_null_index_inverted_idx
(`col_datetime_6__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_char_255__undef_signed_index_inverted_idx
(`col_char_255__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_char_255__undef_signed_index_inverted_p_e_idx
(`col_char_255__undef_signed_index_inverted_p_e`) USING INVERTED
PROPERTIES("parser" = "english"),
+INDEX col_char_255__undef_signed_index_inverted_p_u_idx
(`col_char_255__undef_signed_index_inverted_p_u`) USING INVERTED
PROPERTIES("parser" = "unicode"),
+INDEX col_char_255__undef_signed_not_null_index_inverted_idx
(`col_char_255__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_char_255__undef_signed_not_null_index_inverted_p_e_idx
(`col_char_255__undef_signed_not_null_index_inverted_p_e`) USING INVERTED
PROPERTIES("parser" = "english"),
+INDEX col_char_255__undef_signed_not_null_index_inverted_p_u_idx
(`col_char_255__undef_signed_not_null_index_inverted_p_u`) USING INVERTED
PROPERTIES("parser" = "unicode"),
+INDEX col_varchar_1024__undef_signed_index_inverted_idx
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+INDEX col_varchar_1024__undef_signed_index_inverted_p_e_idx
(`col_varchar_1024__undef_signed_index_inverted_p_e`) USING INVERTED
PROPERTIES("parser" = "english"),
+INDEX col_varchar_1024__undef_signed_index_inverted_p_u_idx
(`col_varchar_1024__undef_signed_index_inverted_p_u`) USING INVERTED
PROPERTIES("parser" = "unicode"),
+INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED,
+INDEX col_varchar_1024__undef_signed_not_null_index_inverted_p_e_idx
(`col_varchar_1024__undef_signed_not_null_index_inverted_p_e`) USING INVERTED
PROPERTIES("parser" = "english"),
+INDEX col_varchar_1024__undef_signed_not_null_index_inverted_p_u_idx
(`col_varchar_1024__undef_signed_not_null_index_inverted_p_u`) USING INVERTED
PROPERTIES("parser" = "unicode")
+) engine=olap
+UNIQUE KEY(pk, col_int_undef_signed_index_inverted,
col_date_undef_signed_not_null, col_varchar_1024__undef_signed)
+distributed by hash(pk) buckets 10
+properties("bloom_filter_columns" = "col_int_undef_signed,
col_int_undef_signed_not_null, col_date_undef_signed_not_null,
col_varchar_1024__undef_signed, col_varchar_1024__undef_signed_not_null",
"replication_num" = "1");
+ """
+ def sqlFile = new File(context.file.parent+'/data.txt')
+ sql """$sqlFile.text"""
+
+ qt_test """
+SELECT
+ col_date_undef_signed
+FROM
+ table_800_undef_partitions2_keys3_properties4_distributed_by524
+where
+ (
+ case
+ col_date_undef_signed
+ when "2024-01-09" then 1
+ when "2023-12-10" then 2
+ else 0
+ end
+ ) = 1;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]