github-actions[bot] commented on code in PR #24554: URL: https://github.com/apache/doris/pull/24554#discussion_r1378436935
########## be/src/vec/data_types/serde/data_type_serde.cpp: ########## @@ -41,5 +42,56 @@ DataTypeSerDeSPtrs create_data_type_serdes(const std::vector<SlotDescriptor*>& s } return serdes; } + +void DataTypeSerDe::convert_array_to_rapidjson(const vectorized::Array& array, + rapidjson::Value& target, + rapidjson::Document::AllocatorType& allocator) { + for (const vectorized::Field& item : array) { + target.SetArray(); + rapidjson::Value val; + convert_field_to_rapidjson(item, val, allocator); + target.PushBack(val, allocator); + } +} + +void DataTypeSerDe::convert_field_to_rapidjson(const vectorized::Field& field, Review Comment: warning: method 'convert_field_to_rapidjson' can be made static [readability-convert-member-functions-to-static] ```suggestion static void DataTypeSerDe::convert_field_to_rapidjson(const vectorized::Field& field, ``` ########## be/src/vec/functions/function_cast.h: ########## @@ -1892,12 +1995,119 @@ case TypeIndex::Float64: return &ConvertImplNumberToJsonb<ColumnFloat64>::execute; case TypeIndex::String: - return &ConvertImplGenericFromString::execute; + if (string_as_jsonb_string) { + // We convert column string to jsonb type just add a string jsonb field to dst column instead of parse + // each line in original string column. + return &ConvertImplStringToJsonbAsJsonbString::execute; + } else { + return &ConvertImplGenericFromString::execute; + } Review Comment: warning: do not use 'else' after 'return' [readability-else-after-return] ```suggestion } return &ConvertImplGenericFromString::execute; ``` ########## be/src/vec/functions/function_cast.h: ########## @@ -1865,17 +1963,22 @@ return &ConvertImplFromJsonb<TypeIndex::Int128, ColumnInt128>::execute; case TypeIndex::Float64: return &ConvertImplFromJsonb<TypeIndex::Float64, ColumnFloat64>::execute; + case TypeIndex::String: + if (!jsonb_string_as_string) { + // Conversion from String through parsing. + return &ConvertImplGenericToString::execute2; + } else { + return ConvertImplGenericFromJsonb::execute; + } Review Comment: warning: do not use 'else' after 'return' [readability-else-after-return] ```suggestion } return ConvertImplGenericFromJsonb::execute; ``` ########## be/src/vec/data_types/serde/data_type_string_serde.cpp: ########## @@ -257,5 +260,26 @@ Status DataTypeStringSerDe::write_column_to_orc(const std::string& timezone, con return Status::OK(); } +void DataTypeStringSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, Review Comment: warning: method 'write_one_cell_to_json' can be made static [readability-convert-member-functions-to-static] ```suggestion static void DataTypeStringSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, ``` be/src/vec/data_types/serde/data_type_string_serde.cpp:264: ```diff - int row_num) const { + int row_num) { ``` ########## be/src/vec/functions/function_cast.h: ########## @@ -640,10 +644,104 @@ struct ConvertImplNumberToJsonb { } }; +struct ConvertImplStringToJsonbAsJsonbString { + static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + const size_t result, size_t input_rows_count) { + auto data_type_to = block.get_by_position(result).type; + const auto& col_with_type_and_name = block.get_by_position(arguments[0]); + const IColumn& col_from = *col_with_type_and_name.column; + auto dst = ColumnString::create(); + ColumnString* dst_str = assert_cast<ColumnString*>(dst.get()); + const auto* from_string = assert_cast<const ColumnString*>(&col_from); + JsonbWriter writer; + for (size_t i = 0; i < input_rows_count; i++) { + auto str_ref = from_string->get_data_at(i); + writer.reset(); + // write raw string to jsonb + writer.writeStartString(); + writer.writeString(str_ref.data, str_ref.size); + writer.writeEndString(); + dst_str->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); + } + block.replace_by_position(result, std::move(dst)); + return Status::OK(); + } +}; + +struct ConvertImplGenericFromJsonb { + static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + const size_t result, size_t input_rows_count) { + auto data_type_to = block.get_by_position(result).type; + const auto& col_with_type_and_name = block.get_by_position(arguments[0]); + const IColumn& col_from = *col_with_type_and_name.column; + if (const ColumnString* col_from_string = check_and_get_column<ColumnString>(&col_from)) { + auto col_to = data_type_to->create_column(); + + size_t size = col_from.size(); + col_to->reserve(size); + + ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size); + ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data(); + const bool is_complex = is_complex_type(data_type_to); + const bool is_dst_string = is_string_or_fixed_string(data_type_to); + for (size_t i = 0; i < size; ++i) { + const auto& val = col_from_string->get_data_at(i); + JsonbDocument* doc = JsonbDocument::createDocument(val.data, val.size); + if (UNLIKELY(!doc || !doc->getValue())) { Review Comment: warning: boolean expression can be simplified by DeMorgan's theorem [readability-simplify-boolean-expr] ```cpp if (UNLIKELY(!doc || !doc->getValue())) { ^ ``` <details> <summary>Additional context</summary> **be/src/common/compiler_util.h:35:** expanded from macro 'UNLIKELY' ```cpp #define UNLIKELY(expr) __builtin_expect(!!(expr), 0) ^ ``` </details> ########## be/src/vec/data_types/serde/data_type_nullable_serde.cpp: ########## @@ -342,5 +342,30 @@ Status DataTypeNullableSerDe::write_column_to_orc(const std::string& timezone, const std::string DataTypeNullableSerDe::NULL_IN_CSV_FOR_ORDINARY_TYPE = "\\N"; const std::string DataTypeNullableSerDe::NULL_IN_CSV_FOR_NESTED_TYPE = "null"; +void DataTypeNullableSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, + int row_num) const { + auto& col = static_cast<const ColumnNullable&>(column); + auto& nested_col = col.get_nested_column(); + if (col.is_null_at(row_num)) { + result.SetNull(); + } else { + nested_serde->write_one_cell_to_json(nested_col, result, allocator, row_num); + } +} + +void DataTypeNullableSerDe::read_one_cell_from_json(IColumn& column, + const rapidjson::Value& result) const { + auto& col = static_cast<ColumnNullable&>(column); Review Comment: warning: method 'read_one_cell_from_json' can be made static [readability-convert-member-functions-to-static] ```suggestion ,static { ``` ########## be/src/vec/data_types/serde/data_type_string_serde.cpp: ########## @@ -257,5 +260,26 @@ return Status::OK(); } +void DataTypeStringSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, + rapidjson::Document::AllocatorType& allocator, + int row_num) const { + const auto& col = static_cast<const ColumnString&>(column); + const auto& data_ref = col.get_data_at(row_num); + result.SetString(data_ref.data, data_ref.size); +} + +void DataTypeStringSerDe::read_one_cell_from_json(IColumn& column, + const rapidjson::Value& result) const { Review Comment: warning: method 'read_one_cell_from_json' can be made static [readability-convert-member-functions-to-static] ```suggestion static void DataTypeStringSerDe::read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) { ``` ########## be/src/vec/functions/function_cast.h: ########## @@ -640,10 +644,104 @@ } }; +struct ConvertImplStringToJsonbAsJsonbString { + static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + const size_t result, size_t input_rows_count) { + auto data_type_to = block.get_by_position(result).type; + const auto& col_with_type_and_name = block.get_by_position(arguments[0]); + const IColumn& col_from = *col_with_type_and_name.column; + auto dst = ColumnString::create(); + ColumnString* dst_str = assert_cast<ColumnString*>(dst.get()); + const auto* from_string = assert_cast<const ColumnString*>(&col_from); + JsonbWriter writer; + for (size_t i = 0; i < input_rows_count; i++) { + auto str_ref = from_string->get_data_at(i); + writer.reset(); + // write raw string to jsonb + writer.writeStartString(); + writer.writeString(str_ref.data, str_ref.size); + writer.writeEndString(); + dst_str->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); + } + block.replace_by_position(result, std::move(dst)); + return Status::OK(); + } +}; + +struct ConvertImplGenericFromJsonb { + static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + const size_t result, size_t input_rows_count) { + auto data_type_to = block.get_by_position(result).type; + const auto& col_with_type_and_name = block.get_by_position(arguments[0]); + const IColumn& col_from = *col_with_type_and_name.column; + if (const ColumnString* col_from_string = check_and_get_column<ColumnString>(&col_from)) { + auto col_to = data_type_to->create_column(); + + size_t size = col_from.size(); + col_to->reserve(size); + + ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size); + ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data(); + const bool is_complex = is_complex_type(data_type_to); + const bool is_dst_string = is_string_or_fixed_string(data_type_to); + for (size_t i = 0; i < size; ++i) { + const auto& val = col_from_string->get_data_at(i); + JsonbDocument* doc = JsonbDocument::createDocument(val.data, val.size); + if (UNLIKELY(!doc || !doc->getValue())) { + (*vec_null_map_to)[i] = 1; + col_to->insert_default(); + continue; + } + + // value is NOT necessary to be deleted since JsonbValue will not allocate memory + JsonbValue* value = doc->getValue(); + if (UNLIKELY(!value)) { + (*vec_null_map_to)[i] = 1; + col_to->insert_default(); + continue; + } + // Note: here we should handle the null element + if (val.size == 0) { + col_to->insert_default(); + // empty string('') is an invalid format for complex type, set null_map to 1 + if (is_complex) { + (*vec_null_map_to)[i] = 1; + } + continue; + } + // add string to string column + if (context->jsonb_string_as_string() && is_dst_string && value->isString()) { + auto blob = static_cast<const JsonbBlobVal*>(value); Review Comment: warning: 'auto blob' can be declared as 'const auto *blob' [readability-qualified-auto] ```suggestion const auto *blob = static_cast<const JsonbBlobVal*>(value); ``` ########## be/src/vec/olap/olap_data_convertor.h: ########## @@ -198,6 +200,7 @@ class OlapBlockDataConvertor { size_t num_rows) override; const void* get_data() const override; const void* get_data_at(size_t offset) const override; + Status convert_to_olap(const UInt8* null_map, const ColumnString* column_array); Review Comment: warning: function 'std::doris::vectorized::OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap' has a definition with different parameter names [readability-inconsistent-declaration-parameter-name] ```cpp Status convert_to_olap(const UInt8* null_map, const ColumnString* column_array); ^ ``` <details> <summary>Additional context</summary> **be/src/vec/olap/olap_data_convertor.cpp:604:** the definition seen here ```cpp Status OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap( ^ ``` **be/src/vec/olap/olap_data_convertor.h:202:** differing parameters are named here: ('column_array'), in definition: ('column_string') ```cpp Status convert_to_olap(const UInt8* null_map, const ColumnString* column_array); ^ ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org