This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 66c5b914566f3400ea99d54f4e57f1b99d27cc79 Author: Kang <kxiao.ti...@gmail.com> AuthorDate: Sat Aug 19 21:44:13 2023 +0800 Revert "[fix](function) to_bitmap parameter parsing failure returns null instead of bitmap_empty (#21236)" This reverts commit 73c65af1a55044732b65dcf6a2e68c8ac960906e. --- be/src/util/bitmap_value.h | 18 ------ be/src/vec/functions/function_bitmap.cpp | 71 ++++++++++++++-------- be/test/vec/function/function_bitmap_test.cpp | 27 -------- be/test/vec/function/function_test_util.h | 22 ------- .../expressions/functions/scalar/ToBitmap.java | 4 +- gensrc/script/doris_builtins_functions.py | 6 +- .../data/nereids_function_p0/scalar_function/B.out | 6 +- .../bitmap_functions/test_bitmap_function.out | 2 +- .../bitmap_functions/test_bitmap_function.out | 2 +- 9 files changed, 57 insertions(+), 101 deletions(-) diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index ec75c4141c..52c4caaa41 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1846,24 +1846,6 @@ public: return *this; } - bool operator==(const BitmapValue& rhs) const { - if (_type != rhs._type) return false; - - switch (_type) { - case BitmapValue::BitmapDataType::EMPTY: - return true; - case BitmapValue::BitmapDataType::SINGLE: - return _sv == rhs._sv; - case BitmapValue::BitmapDataType::BITMAP: - return _bitmap == rhs._bitmap; - case BitmapValue::BitmapDataType::SET: - return _set == rhs._set; - default: - CHECK(false) << _type; - } - return false; - } - // check if value x is present bool contains(uint64_t x) const { switch (_type) { diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 5b03abdcbe..578ce0e34d 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -86,31 +86,55 @@ struct BitmapEmpty { struct ToBitmap { static constexpr auto name = "to_bitmap"; - using ArgumentType = DataTypeString; + using ReturnType = DataTypeBitMap; - static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, - std::vector<BitmapValue>& res_data, NullMap& null_map, - size_t input_rows_count) { - res_data.resize(input_rows_count); - if (offsets.size() == 0 && input_rows_count == 1) { - // For NULL constant - res_data.emplace_back(); - null_map[0] = 1; - return Status::OK(); - } - for (size_t i = 0; i < input_rows_count; ++i) { - const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); - size_t str_size = offsets[i] - offsets[i - 1]; - StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; - uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(raw_str, str_size, - &parse_result); - if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { - res_data[i].add(int_value); - } else { - null_map[i] = 1; + template <typename ColumnType> + static void vector(const ColumnType* col, MutableColumnPtr& col_res) { + execute<ColumnType, false>(col, nullptr, col_res); + } + template <typename ColumnType> + static void vector_nullable(const ColumnType* col, const NullMap& nullmap, + MutableColumnPtr& col_res) { + execute<ColumnType, true>(col, &nullmap, col_res); + } + template <typename ColumnType, bool arg_is_nullable> + static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) { + if constexpr (std::is_same_v<ColumnType, ColumnString>) { + const ColumnString::Chars& data = col->get_chars(); + const ColumnString::Offsets& offsets = col->get_offsets(); + + auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); + auto& res_data = res_column->get_data(); + size_t size = offsets.size(); + + for (size_t i = 0; i < size; ++i) { + if (arg_is_nullable && ((*nullmap)[i])) { + continue; + } else { + const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); + size_t str_size = offsets[i] - offsets[i - 1]; + StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; + uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>( + raw_str, str_size, &parse_result); + if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { + res_data[i].add(int_value); + } + } + } + } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) { + auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get()); + auto& res_data = res_column->get_data(); + size_t size = col->size(); + + for (size_t i = 0; i < size; ++i) { + if constexpr (arg_is_nullable) { + if ((*nullmap)[i]) { + continue; + } + } + res_data[i].add(col->get_data()[i]); } } - return Status::OK(); } }; @@ -287,7 +311,6 @@ public: auto& res = res_data_column->get_data(); ColumnPtr& argument_column = block.get_by_position(arguments[0]).column; - // todo(zeno) int can avoid cast into string or char if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) { const auto& str_column = static_cast<const ColumnString&>(*argument_column); const ColumnString::Chars& data = str_column.get_chars(); @@ -1082,7 +1105,7 @@ public: }; using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>; -using FunctionToBitmap = FunctionBitmapAlwaysNull<ToBitmap>; +using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>; using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>; using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>; diff --git a/be/test/vec/function/function_bitmap_test.cpp b/be/test/vec/function/function_bitmap_test.cpp index d8b19e5c78..689199e5c5 100644 --- a/be/test/vec/function/function_bitmap_test.cpp +++ b/be/test/vec/function/function_bitmap_test.cpp @@ -27,7 +27,6 @@ #include "testutil/any_type.h" #include "util/bitmap_value.h" #include "vec/core/types.h" -#include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" @@ -208,30 +207,4 @@ TEST(function_bitmap_test, function_bitmap_has_all) { check_function<DataTypeUInt8, true>(func_name, input_types, data_set); } -TEST(function_bitmap_test, function_to_bitmap) { - std::string func_name = "to_bitmap"; - InputTypeSet input_types = {TypeIndex::String}; - - BitmapValue bitmap1(1); - DataSet data_set = {{{std::string("1")}, &bitmap1}, - {{std::string("-1")}, Null()}, - {{std::string("1.11")}, Null()}, - {{Null()}, Null()}}; - - check_function<DataTypeBitMap, true>(func_name, input_types, data_set); -} - -TEST(function_bitmap_test, function_bitmap_from_string) { - std::string func_name = "bitmap_from_string"; - InputTypeSet input_types = {TypeIndex::String}; - - BitmapValue bitmap1(1); - DataSet data_set = {{{std::string("1")}, &bitmap1}, - {{std::string("-1")}, Null()}, - {{std::string("1.11")}, Null()}, - {{Null()}, Null()}}; - - check_function<DataTypeBitMap, true>(func_name, input_types, data_set); -} - } // namespace doris::vectorized diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 9ca385b154..c4e3f4379d 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -43,7 +43,6 @@ #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" -#include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/functions/simple_function_factory.h" @@ -291,17 +290,6 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty ColumnPtr column = block.get_columns()[result]; EXPECT_TRUE(column != nullptr); - ColumnPtr nested_col = nullptr; - ColumnPtr null_map_col = ColumnUInt8::create(row_size, 0); - if (doris::vectorized::is_column_nullable(*column)) { - const auto& null_column = check_and_get_column<ColumnNullable>(column); - nested_col = null_column->get_nested_column_ptr(); - null_map_col = null_column->get_null_map_column_ptr(); - } else { - nested_col = column; - } - const auto& null_map = check_and_get_column<ColumnUInt8>(null_map_col)->get_data(); - for (int i = 0; i < row_size; ++i) { auto check_column_data = [&]() { if constexpr (std::is_same_v<ReturnType, DataTypeJsonb>) { @@ -315,16 +303,6 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty EXPECT_EQ(expect_data, JsonbToJson::jsonb_to_json_string(s.data, s.size)) << " at row " << i; } - } else if constexpr (std::is_same_v<ReturnType, DataTypeBitMap>) { - const auto* expect_data = - any_cast<typename ReturnType::FieldType*>(data_set[i].second); - if (null_map[i]) { - EXPECT_TRUE(expect_data->empty()) << " at row " << i; - } else { - auto& bitmap_value = - check_and_get_column<ColumnBitmap>(nested_col)->get_data()[i]; - EXPECT_EQ(*expect_data, bitmap_value) << " at row " << i; - } } else { Field field; column->get(i, field); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java index 5a3e9647f8..cf833ed247 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java @@ -19,7 +19,7 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -36,7 +36,7 @@ import java.util.List; * ScalarFunction 'to_bitmap'. This class is generated by GenerateFunction. */ public class ToBitmap extends ScalarFunction - implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable { + implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable { public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( FunctionSignature.ret(BitmapType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT), diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index d563d8560c..95926678b4 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1758,11 +1758,11 @@ visible_functions = { #bitmap function "Bitmap": [ - [['to_bitmap'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NULLABLE'], + [['to_bitmap'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], [['to_bitmap_with_check'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], - [['to_bitmap'], 'BITMAP', ['STRING'], 'ALWAYS_NULLABLE'], + [['to_bitmap'], 'BITMAP', ['STRING'], 'ALWAYS_NOT_NULLABLE'], [['to_bitmap_with_check'], 'BITMAP', ['STRING'], 'ALWAYS_NOT_NULLABLE'], - [['to_bitmap'], 'BITMAP', ['BIGINT'], 'ALWAYS_NULLABLE'], + [['to_bitmap'], 'BITMAP', ['BIGINT'], 'ALWAYS_NOT_NULLABLE'], [['to_bitmap_with_check'], 'BITMAP', ['BIGINT'], 'ALWAYS_NOT_NULLABLE'], [['bitmap_hash'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], [['bitmap_hash64'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'], diff --git a/regression-test/data/nereids_function_p0/scalar_function/B.out b/regression-test/data/nereids_function_p0/scalar_function/B.out index f1ec59eb04..04a79b36a7 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/B.out +++ b/regression-test/data/nereids_function_p0/scalar_function/B.out @@ -319,7 +319,7 @@ true \N -- !sql_bitmap_has_all_Bitmap_Bitmap -- -\N +true true true true @@ -348,7 +348,7 @@ true true -- !sql_bitmap_has_any_Bitmap_Bitmap -- -\N +false true true true @@ -696,7 +696,7 @@ true \N -- !sql_bitmap_to_string_Bitmap -- -\N + 1 2 3 diff --git a/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out b/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out index 0f784e635c..d344ad27bf 100644 --- a/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out +++ b/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out @@ -406,7 +406,7 @@ true 1 -- !sql -- -\N + -- !sql -- \N diff --git a/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out b/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out index 885a24ddee..4eeae0ccd9 100644 --- a/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out +++ b/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out @@ -406,7 +406,7 @@ true 1 -- !sql -- -\N + -- !sql -- \N --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org