xiaokang commented on code in PR #21555: URL: https://github.com/apache/doris/pull/21555#discussion_r1257406548
########## be/src/vec/functions/function_jsonb.cpp: ########## @@ -503,6 +508,93 @@ struct JsonbExtractStringImpl { public: // for jsonb_extract_string + static Status vector_vector_v2( + FunctionContext* context, const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, const bool& json_data_const, + const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths + const std::vector<bool>& path_const, ColumnString::Chars& res_data, + ColumnString::Offsets& res_offsets, NullMap& null_map, bool& is_invalid_json_path) { + size_t input_rows_count = json_data_const ? rdata_columns.size() : loffsets.size(); + res_offsets.resize(input_rows_count); + + std::unique_ptr<JsonbWriter> writer; + if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) { + writer.reset(new JsonbWriter()); + } + + std::unique_ptr<JsonbToJson> formater; + + for (size_t i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, res_data, res_offsets, null_map); + continue; + } + size_t l_off = loffsets[index_check_const(i, json_data_const) - 1]; + size_t l_size = loffsets[index_check_const(i, json_data_const)] - l_off; + const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); + if (rdata_columns.size() == 1) { // just return origin value + const ColumnString* path_col = rdata_columns[0]; + const ColumnString::Chars& rdata = path_col->get_chars(); + const ColumnString::Offsets& roffsets = path_col->get_offsets(); + size_t r_off = roffsets[index_check_const(i, path_const[0]) - 1]; + size_t r_size = roffsets[index_check_const(i, path_const[0])] - r_off; + const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); + inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size, + r_raw, r_size, is_invalid_json_path); + } else { // will make array string to user + writer->reset(); + writer->writeStartArray(); + for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { + const ColumnString* path_col = rdata_columns[pi]; + const ColumnString::Chars& rdata = path_col->get_chars(); + const ColumnString::Offsets& roffsets = path_col->get_offsets(); + size_t r_off = roffsets[index_check_const(i, path_const[pi]) - 1]; + size_t r_size = roffsets[index_check_const(i, path_const[pi])] - r_off; + const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); + String path(r_raw, r_size); + // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory + JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size); + if (UNLIKELY(!doc || !doc->getValue())) { + writer->writeNull(); + continue; + } + // value is NOT necessary to be deleted since JsonbValue will not allocate memory + JsonbValue* value = + doc->getValue()->findPath(r_raw, r_size, is_invalid_json_path, nullptr); + // if not valid json path , should return error message to user + if (is_invalid_json_path) { + return Status::InvalidArgument( + "Json path error: {} for value: {}", + JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH), + std::string_view(reinterpret_cast<const char*>(rdata.data()), + rdata.size())); + } + if (UNLIKELY(!value)) { + writer->writeNull(); + continue; + } + writer->writeValue(value); Review Comment: put it in else branch ########## be/src/vec/functions/function_jsonb.cpp: ########## @@ -503,6 +508,93 @@ struct JsonbExtractStringImpl { public: // for jsonb_extract_string + static Status vector_vector_v2( + FunctionContext* context, const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, const bool& json_data_const, + const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths + const std::vector<bool>& path_const, ColumnString::Chars& res_data, + ColumnString::Offsets& res_offsets, NullMap& null_map, bool& is_invalid_json_path) { + size_t input_rows_count = json_data_const ? rdata_columns.size() : loffsets.size(); + res_offsets.resize(input_rows_count); + + std::unique_ptr<JsonbWriter> writer; + if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) { + writer.reset(new JsonbWriter()); + } + + std::unique_ptr<JsonbToJson> formater; + + for (size_t i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, res_data, res_offsets, null_map); + continue; + } + size_t l_off = loffsets[index_check_const(i, json_data_const) - 1]; + size_t l_size = loffsets[index_check_const(i, json_data_const)] - l_off; + const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); + if (rdata_columns.size() == 1) { // just return origin value + const ColumnString* path_col = rdata_columns[0]; + const ColumnString::Chars& rdata = path_col->get_chars(); + const ColumnString::Offsets& roffsets = path_col->get_offsets(); + size_t r_off = roffsets[index_check_const(i, path_const[0]) - 1]; + size_t r_size = roffsets[index_check_const(i, path_const[0])] - r_off; + const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); + inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size, + r_raw, r_size, is_invalid_json_path); + } else { // will make array string to user + writer->reset(); + writer->writeStartArray(); + for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { + const ColumnString* path_col = rdata_columns[pi]; + const ColumnString::Chars& rdata = path_col->get_chars(); + const ColumnString::Offsets& roffsets = path_col->get_offsets(); + size_t r_off = roffsets[index_check_const(i, path_const[pi]) - 1]; + size_t r_size = roffsets[index_check_const(i, path_const[pi])] - r_off; + const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); + String path(r_raw, r_size); Review Comment: Use string_view to avoid copy if it's really used. ########## be/src/vec/functions/function_jsonb.cpp: ########## @@ -503,6 +508,93 @@ struct JsonbExtractStringImpl { public: // for jsonb_extract_string + static Status vector_vector_v2( + FunctionContext* context, const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, const bool& json_data_const, + const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths + const std::vector<bool>& path_const, ColumnString::Chars& res_data, + ColumnString::Offsets& res_offsets, NullMap& null_map, bool& is_invalid_json_path) { + size_t input_rows_count = json_data_const ? rdata_columns.size() : loffsets.size(); + res_offsets.resize(input_rows_count); + + std::unique_ptr<JsonbWriter> writer; + if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) { + writer.reset(new JsonbWriter()); + } + + std::unique_ptr<JsonbToJson> formater; + + for (size_t i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, res_data, res_offsets, null_map); + continue; + } + size_t l_off = loffsets[index_check_const(i, json_data_const) - 1]; + size_t l_size = loffsets[index_check_const(i, json_data_const)] - l_off; + const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); + if (rdata_columns.size() == 1) { // just return origin value + const ColumnString* path_col = rdata_columns[0]; + const ColumnString::Chars& rdata = path_col->get_chars(); + const ColumnString::Offsets& roffsets = path_col->get_offsets(); + size_t r_off = roffsets[index_check_const(i, path_const[0]) - 1]; + size_t r_size = roffsets[index_check_const(i, path_const[0])] - r_off; + const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); + inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size, + r_raw, r_size, is_invalid_json_path); + } else { // will make array string to user + writer->reset(); + writer->writeStartArray(); + for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { + const ColumnString* path_col = rdata_columns[pi]; + const ColumnString::Chars& rdata = path_col->get_chars(); + const ColumnString::Offsets& roffsets = path_col->get_offsets(); + size_t r_off = roffsets[index_check_const(i, path_const[pi]) - 1]; + size_t r_size = roffsets[index_check_const(i, path_const[pi])] - r_off; + const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); + String path(r_raw, r_size); + // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory + JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size); + if (UNLIKELY(!doc || !doc->getValue())) { + writer->writeNull(); + continue; + } + // value is NOT necessary to be deleted since JsonbValue will not allocate memory + JsonbValue* value = + doc->getValue()->findPath(r_raw, r_size, is_invalid_json_path, nullptr); + // if not valid json path , should return error message to user + if (is_invalid_json_path) { + return Status::InvalidArgument( + "Json path error: {} for value: {}", + JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH), + std::string_view(reinterpret_cast<const char*>(rdata.data()), + rdata.size())); + } + if (UNLIKELY(!value)) { + writer->writeNull(); + continue; + } + writer->writeValue(value); + } + writer->writeEndArray(); + if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) { + StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), + writer->getOutput()->getSize()), + i, res_data, res_offsets); + } else { + // transform jsonb to json + if (!formater) { + formater.reset(new JsonbToJson()); + } + StringOP::push_value_string( + formater->to_json_string(writer->getOutput()->getBuffer(), Review Comment: In this branch, writer is not initialized since ReturnType is not Jsonb. In fact, I think format to string is not necessary, since planner will automaticlly add cast from JSONB to String if necessary. ########## be/src/vec/functions/function_jsonb.cpp: ########## @@ -503,6 +508,93 @@ struct JsonbExtractStringImpl { public: // for jsonb_extract_string + static Status vector_vector_v2( + FunctionContext* context, const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, const bool& json_data_const, + const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths + const std::vector<bool>& path_const, ColumnString::Chars& res_data, + ColumnString::Offsets& res_offsets, NullMap& null_map, bool& is_invalid_json_path) { + size_t input_rows_count = json_data_const ? rdata_columns.size() : loffsets.size(); + res_offsets.resize(input_rows_count); + + std::unique_ptr<JsonbWriter> writer; + if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) { + writer.reset(new JsonbWriter()); + } + + std::unique_ptr<JsonbToJson> formater; + + for (size_t i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, res_data, res_offsets, null_map); + continue; + } + size_t l_off = loffsets[index_check_const(i, json_data_const) - 1]; + size_t l_size = loffsets[index_check_const(i, json_data_const)] - l_off; + const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); + if (rdata_columns.size() == 1) { // just return origin value + const ColumnString* path_col = rdata_columns[0]; + const ColumnString::Chars& rdata = path_col->get_chars(); + const ColumnString::Offsets& roffsets = path_col->get_offsets(); + size_t r_off = roffsets[index_check_const(i, path_const[0]) - 1]; + size_t r_size = roffsets[index_check_const(i, path_const[0])] - r_off; + const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); + inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size, + r_raw, r_size, is_invalid_json_path); + } else { // will make array string to user + writer->reset(); + writer->writeStartArray(); + for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { + const ColumnString* path_col = rdata_columns[pi]; + const ColumnString::Chars& rdata = path_col->get_chars(); + const ColumnString::Offsets& roffsets = path_col->get_offsets(); + size_t r_off = roffsets[index_check_const(i, path_const[pi]) - 1]; + size_t r_size = roffsets[index_check_const(i, path_const[pi])] - r_off; + const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); + String path(r_raw, r_size); Review Comment: unused variable path -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org