This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 1c3cc77a54 [fix](function) to_bitmap parameter parsing failure returns null instead of bitmap_empty (#21236) 1c3cc77a54 is described below commit 1c3cc77a54938ed948ad8186b8dea8385977d23c Author: ZenoYang <cookie...@qq.com> AuthorDate: Fri Aug 18 14:37:49 2023 +0800 [fix](function) to_bitmap parameter parsing failure returns null instead of bitmap_empty (#21236) * [fix](function) to_bitmap parameter parsing failure returns null instead of bitmap_empty * add ut * fix nereids * fix regression-test --- be/src/util/bitmap_value.h | 18 ++++++ be/src/vec/functions/function_bitmap.cpp | 71 ++++++++-------------- be/test/vec/function/function_bitmap_test.cpp | 27 ++++++++ be/test/vec/function/function_test_util.h | 22 +++++++ .../expressions/functions/scalar/ToBitmap.java | 4 +- gensrc/script/doris_builtins_functions.py | 6 +- .../data/nereids_function_p0/scalar_function/B.out | 6 +- .../bitmap_functions/test_bitmap_function.out | 2 +- .../bitmap_functions/test_bitmap_function.out | 2 +- 9 files changed, 101 insertions(+), 57 deletions(-) diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 52c4caaa41..ec75c4141c 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1846,6 +1846,24 @@ public: return *this; } + bool operator==(const BitmapValue& rhs) const { + if (_type != rhs._type) return false; + + switch (_type) { + case BitmapValue::BitmapDataType::EMPTY: + return true; + case BitmapValue::BitmapDataType::SINGLE: + return _sv == rhs._sv; + case BitmapValue::BitmapDataType::BITMAP: + return _bitmap == rhs._bitmap; + case BitmapValue::BitmapDataType::SET: + return _set == rhs._set; + default: + CHECK(false) << _type; + } + return false; + } + // check if value x is present bool contains(uint64_t x) const { switch (_type) { diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 578ce0e34d..5b03abdcbe 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -86,55 +86,31 @@ struct BitmapEmpty { struct ToBitmap { static constexpr auto name = "to_bitmap"; - using ReturnType = DataTypeBitMap; - - template <typename ColumnType> - static void vector(const ColumnType* col, MutableColumnPtr& col_res) { - execute<ColumnType, false>(col, nullptr, col_res); - } - template <typename ColumnType> - static void vector_nullable(const ColumnType* col, const NullMap& nullmap, - MutableColumnPtr& col_res) { - execute<ColumnType, true>(col, &nullmap, col_res); - } - template <typename ColumnType, bool arg_is_nullable> - static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) { - if constexpr (std::is_same_v<ColumnType, ColumnString>) { - const ColumnString::Chars& data = col->get_chars(); - const ColumnString::Offsets& offsets = col->get_offsets(); - - auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); - auto& res_data = res_column->get_data(); - size_t size = offsets.size(); - - for (size_t i = 0; i < size; ++i) { - if (arg_is_nullable && ((*nullmap)[i])) { - continue; - } else { - const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); - size_t str_size = offsets[i] - offsets[i - 1]; - StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; - uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>( - raw_str, str_size, &parse_result); - if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { - res_data[i].add(int_value); - } - } - } - } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) { - auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); - auto& res_data = res_column->get_data(); - size_t size = col->size(); + using ArgumentType = DataTypeString; - for (size_t i = 0; i < size; ++i) { - if constexpr (arg_is_nullable) { - if ((*nullmap)[i]) { - continue; - } - } - res_data[i].add(col->get_data()[i]); + static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, + std::vector<BitmapValue>& res_data, NullMap& null_map, + size_t input_rows_count) { + res_data.resize(input_rows_count); + if (offsets.size() == 0 && input_rows_count == 1) { + // For NULL constant + res_data.emplace_back(); + null_map[0] = 1; + return Status::OK(); + } + for (size_t i = 0; i < input_rows_count; ++i) { + const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); + size_t str_size = offsets[i] - offsets[i - 1]; + StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; + uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(raw_str, str_size, + &parse_result); + if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { + res_data[i].add(int_value); + } else { + null_map[i] = 1; } } + return Status::OK(); } }; @@ -311,6 +287,7 @@ public: auto& res = res_data_column->get_data(); ColumnPtr& argument_column = block.get_by_position(arguments[0]).column; + // todo(zeno) int can avoid cast into string or char if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) { const auto& str_column = static_cast<const ColumnString&>(*argument_column); const ColumnString::Chars& data = str_column.get_chars(); @@ -1105,7 +1082,7 @@ public: }; using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>; -using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>; +using FunctionToBitmap = FunctionBitmapAlwaysNull<ToBitmap>; using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>; using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>; diff --git a/be/test/vec/function/function_bitmap_test.cpp b/be/test/vec/function/function_bitmap_test.cpp index 689199e5c5..d8b19e5c78 100644 --- a/be/test/vec/function/function_bitmap_test.cpp +++ b/be/test/vec/function/function_bitmap_test.cpp @@ -27,6 +27,7 @@ #include "testutil/any_type.h" #include "util/bitmap_value.h" #include "vec/core/types.h" +#include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" @@ -207,4 +208,30 @@ TEST(function_bitmap_test, function_bitmap_has_all) { check_function<DataTypeUInt8, true>(func_name, input_types, data_set); } +TEST(function_bitmap_test, function_to_bitmap) { + std::string func_name = "to_bitmap"; + InputTypeSet input_types = {TypeIndex::String}; + + BitmapValue bitmap1(1); + DataSet data_set = {{{std::string("1")}, &bitmap1}, + {{std::string("-1")}, Null()}, + {{std::string("1.11")}, Null()}, + {{Null()}, Null()}}; + + check_function<DataTypeBitMap, true>(func_name, input_types, data_set); +} + +TEST(function_bitmap_test, function_bitmap_from_string) { + std::string func_name = "bitmap_from_string"; + InputTypeSet input_types = {TypeIndex::String}; + + BitmapValue bitmap1(1); + DataSet data_set = {{{std::string("1")}, &bitmap1}, + {{std::string("-1")}, Null()}, + {{std::string("1.11")}, Null()}, + {{Null()}, Null()}}; + + check_function<DataTypeBitMap, true>(func_name, input_types, data_set); +} + } // namespace doris::vectorized diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index c4e3f4379d..9ca385b154 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -43,6 +43,7 @@ #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/functions/simple_function_factory.h" @@ -290,6 +291,17 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty ColumnPtr column = block.get_columns()[result]; EXPECT_TRUE(column != nullptr); + ColumnPtr nested_col = nullptr; + ColumnPtr null_map_col = ColumnUInt8::create(row_size, 0); + if (doris::vectorized::is_column_nullable(*column)) { + const auto& null_column = check_and_get_column<ColumnNullable>(column); + nested_col = null_column->get_nested_column_ptr(); + null_map_col = null_column->get_null_map_column_ptr(); + } else { + nested_col = column; + } + const auto& null_map = check_and_get_column<ColumnUInt8>(null_map_col)->get_data(); + for (int i = 0; i < row_size; ++i) { auto check_column_data = [&]() { if constexpr (std::is_same_v<ReturnType, DataTypeJsonb>) { @@ -303,6 +315,16 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty EXPECT_EQ(expect_data, JsonbToJson::jsonb_to_json_string(s.data, s.size)) << " at row " << i; } + } else if constexpr (std::is_same_v<ReturnType, DataTypeBitMap>) { + const auto* expect_data = + any_cast<typename ReturnType::FieldType*>(data_set[i].second); + if (null_map[i]) { + EXPECT_TRUE(expect_data->empty()) << " at row " << i; + } else { + auto& bitmap_value = + check_and_get_column<ColumnBitmap>(nested_col)->get_data()[i]; + EXPECT_EQ(*expect_data, bitmap_value) << " at row " << i; + } } else { Field field; column->get(i, field); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java index cf833ed247..5a3e9647f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java @@ -19,7 +19,7 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -36,7 +36,7 @@ import java.util.List; * ScalarFunction 'to_bitmap'. This class is generated by GenerateFunction. */ public class ToBitmap extends ScalarFunction - implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable { + implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable { public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( FunctionSignature.ret(BitmapType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT), diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 309e5f4c77..72ef074d7d 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1758,11 +1758,11 @@ visible_functions = { #bitmap function "Bitmap": [ - [['to_bitmap'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], + [['to_bitmap'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NULLABLE'], [['to_bitmap_with_check'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], - [['to_bitmap'], 'BITMAP', ['STRING'], 'ALWAYS_NOT_NULLABLE'], + [['to_bitmap'], 'BITMAP', ['STRING'], 'ALWAYS_NULLABLE'], [['to_bitmap_with_check'], 'BITMAP', ['STRING'], 'ALWAYS_NOT_NULLABLE'], - [['to_bitmap'], 'BITMAP', ['BIGINT'], 'ALWAYS_NOT_NULLABLE'], + [['to_bitmap'], 'BITMAP', ['BIGINT'], 'ALWAYS_NULLABLE'], [['to_bitmap_with_check'], 'BITMAP', ['BIGINT'], 'ALWAYS_NOT_NULLABLE'], [['bitmap_hash'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], [['bitmap_hash64'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], diff --git a/regression-test/data/nereids_function_p0/scalar_function/B.out b/regression-test/data/nereids_function_p0/scalar_function/B.out index 04a79b36a7..f1ec59eb04 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/B.out +++ b/regression-test/data/nereids_function_p0/scalar_function/B.out @@ -319,7 +319,7 @@ true \N -- !sql_bitmap_has_all_Bitmap_Bitmap -- -true +\N true true true @@ -348,7 +348,7 @@ true true -- !sql_bitmap_has_any_Bitmap_Bitmap -- -false +\N true true true @@ -696,7 +696,7 @@ true \N -- !sql_bitmap_to_string_Bitmap -- - +\N 1 2 3 diff --git a/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out b/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out index d344ad27bf..0f784e635c 100644 --- a/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out +++ b/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out @@ -406,7 +406,7 @@ true 1 -- !sql -- - +\N -- !sql -- \N diff --git a/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out b/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out index 4eeae0ccd9..885a24ddee 100644 --- a/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out +++ b/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out @@ -406,7 +406,7 @@ true 1 -- !sql -- - +\N -- !sql -- \N --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org