xiaokang commented on code in PR #21555:
URL: https://github.com/apache/doris/pull/21555#discussion_r1257406548


##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -503,6 +508,93 @@ struct JsonbExtractStringImpl {
 
 public:
     // for jsonb_extract_string
+    static Status vector_vector_v2(
+            FunctionContext* context, const ColumnString::Chars& ldata,
+            const ColumnString::Offsets& loffsets, const bool& json_data_const,
+            const std::vector<const ColumnString*>& rdata_columns, // here we 
can support more paths
+            const std::vector<bool>& path_const, ColumnString::Chars& res_data,
+            ColumnString::Offsets& res_offsets, NullMap& null_map, bool& 
is_invalid_json_path) {
+        size_t input_rows_count = json_data_const ? rdata_columns.size() : 
loffsets.size();
+        res_offsets.resize(input_rows_count);
+
+        std::unique_ptr<JsonbWriter> writer;
+        if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+            writer.reset(new JsonbWriter());
+        }
+
+        std::unique_ptr<JsonbToJson> formater;
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map[i]) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map);
+                continue;
+            }
+            size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
+            size_t l_size = loffsets[index_check_const(i, json_data_const)] - 
l_off;
+            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+            if (rdata_columns.size() == 1) { // just return origin value
+                const ColumnString* path_col = rdata_columns[0];
+                const ColumnString::Chars& rdata = path_col->get_chars();
+                const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
+                size_t r_off = roffsets[index_check_const(i, path_const[0]) - 
1];
+                size_t r_size = roffsets[index_check_const(i, path_const[0])] 
- r_off;
+                const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                inner_loop_impl(i, res_data, res_offsets, null_map, writer, 
formater, l_raw, l_size,
+                                r_raw, r_size, is_invalid_json_path);
+            } else { // will make array string to user
+                writer->reset();
+                writer->writeStartArray();
+                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
+                    const ColumnString* path_col = rdata_columns[pi];
+                    const ColumnString::Chars& rdata = path_col->get_chars();
+                    const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
+                    size_t r_off = roffsets[index_check_const(i, 
path_const[pi]) - 1];
+                    size_t r_size = roffsets[index_check_const(i, 
path_const[pi])] - r_off;
+                    const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                    String path(r_raw, r_size);
+                    // doc is NOT necessary to be deleted since JsonbDocument 
will not allocate memory
+                    JsonbDocument* doc = JsonbDocument::createDocument(l_raw, 
l_size);
+                    if (UNLIKELY(!doc || !doc->getValue())) {
+                        writer->writeNull();
+                        continue;
+                    }
+                    // value is NOT necessary to be deleted since JsonbValue 
will not allocate memory
+                    JsonbValue* value =
+                            doc->getValue()->findPath(r_raw, r_size, 
is_invalid_json_path, nullptr);
+                    // if not valid json path , should return error message to 
user
+                    if (is_invalid_json_path) {
+                        return Status::InvalidArgument(
+                                "Json path error: {} for value: {}",
+                                
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+                                std::string_view(reinterpret_cast<const 
char*>(rdata.data()),
+                                                 rdata.size()));
+                    }
+                    if (UNLIKELY(!value)) {
+                        writer->writeNull();
+                        continue;
+                    }
+                    writer->writeValue(value);

Review Comment:
   put it in else branch



##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -503,6 +508,93 @@ struct JsonbExtractStringImpl {
 
 public:
     // for jsonb_extract_string
+    static Status vector_vector_v2(
+            FunctionContext* context, const ColumnString::Chars& ldata,
+            const ColumnString::Offsets& loffsets, const bool& json_data_const,
+            const std::vector<const ColumnString*>& rdata_columns, // here we 
can support more paths
+            const std::vector<bool>& path_const, ColumnString::Chars& res_data,
+            ColumnString::Offsets& res_offsets, NullMap& null_map, bool& 
is_invalid_json_path) {
+        size_t input_rows_count = json_data_const ? rdata_columns.size() : 
loffsets.size();
+        res_offsets.resize(input_rows_count);
+
+        std::unique_ptr<JsonbWriter> writer;
+        if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+            writer.reset(new JsonbWriter());
+        }
+
+        std::unique_ptr<JsonbToJson> formater;
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map[i]) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map);
+                continue;
+            }
+            size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
+            size_t l_size = loffsets[index_check_const(i, json_data_const)] - 
l_off;
+            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+            if (rdata_columns.size() == 1) { // just return origin value
+                const ColumnString* path_col = rdata_columns[0];
+                const ColumnString::Chars& rdata = path_col->get_chars();
+                const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
+                size_t r_off = roffsets[index_check_const(i, path_const[0]) - 
1];
+                size_t r_size = roffsets[index_check_const(i, path_const[0])] 
- r_off;
+                const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                inner_loop_impl(i, res_data, res_offsets, null_map, writer, 
formater, l_raw, l_size,
+                                r_raw, r_size, is_invalid_json_path);
+            } else { // will make array string to user
+                writer->reset();
+                writer->writeStartArray();
+                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
+                    const ColumnString* path_col = rdata_columns[pi];
+                    const ColumnString::Chars& rdata = path_col->get_chars();
+                    const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
+                    size_t r_off = roffsets[index_check_const(i, 
path_const[pi]) - 1];
+                    size_t r_size = roffsets[index_check_const(i, 
path_const[pi])] - r_off;
+                    const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                    String path(r_raw, r_size);

Review Comment:
   Use string_view to avoid copy if it's really used.



##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -503,6 +508,93 @@ struct JsonbExtractStringImpl {
 
 public:
     // for jsonb_extract_string
+    static Status vector_vector_v2(
+            FunctionContext* context, const ColumnString::Chars& ldata,
+            const ColumnString::Offsets& loffsets, const bool& json_data_const,
+            const std::vector<const ColumnString*>& rdata_columns, // here we 
can support more paths
+            const std::vector<bool>& path_const, ColumnString::Chars& res_data,
+            ColumnString::Offsets& res_offsets, NullMap& null_map, bool& 
is_invalid_json_path) {
+        size_t input_rows_count = json_data_const ? rdata_columns.size() : 
loffsets.size();
+        res_offsets.resize(input_rows_count);
+
+        std::unique_ptr<JsonbWriter> writer;
+        if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+            writer.reset(new JsonbWriter());
+        }
+
+        std::unique_ptr<JsonbToJson> formater;
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map[i]) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map);
+                continue;
+            }
+            size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
+            size_t l_size = loffsets[index_check_const(i, json_data_const)] - 
l_off;
+            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+            if (rdata_columns.size() == 1) { // just return origin value
+                const ColumnString* path_col = rdata_columns[0];
+                const ColumnString::Chars& rdata = path_col->get_chars();
+                const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
+                size_t r_off = roffsets[index_check_const(i, path_const[0]) - 
1];
+                size_t r_size = roffsets[index_check_const(i, path_const[0])] 
- r_off;
+                const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                inner_loop_impl(i, res_data, res_offsets, null_map, writer, 
formater, l_raw, l_size,
+                                r_raw, r_size, is_invalid_json_path);
+            } else { // will make array string to user
+                writer->reset();
+                writer->writeStartArray();
+                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
+                    const ColumnString* path_col = rdata_columns[pi];
+                    const ColumnString::Chars& rdata = path_col->get_chars();
+                    const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
+                    size_t r_off = roffsets[index_check_const(i, 
path_const[pi]) - 1];
+                    size_t r_size = roffsets[index_check_const(i, 
path_const[pi])] - r_off;
+                    const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                    String path(r_raw, r_size);
+                    // doc is NOT necessary to be deleted since JsonbDocument 
will not allocate memory
+                    JsonbDocument* doc = JsonbDocument::createDocument(l_raw, 
l_size);
+                    if (UNLIKELY(!doc || !doc->getValue())) {
+                        writer->writeNull();
+                        continue;
+                    }
+                    // value is NOT necessary to be deleted since JsonbValue 
will not allocate memory
+                    JsonbValue* value =
+                            doc->getValue()->findPath(r_raw, r_size, 
is_invalid_json_path, nullptr);
+                    // if not valid json path , should return error message to 
user
+                    if (is_invalid_json_path) {
+                        return Status::InvalidArgument(
+                                "Json path error: {} for value: {}",
+                                
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+                                std::string_view(reinterpret_cast<const 
char*>(rdata.data()),
+                                                 rdata.size()));
+                    }
+                    if (UNLIKELY(!value)) {
+                        writer->writeNull();
+                        continue;
+                    }
+                    writer->writeValue(value);
+                }
+                writer->writeEndArray();
+                if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+                    
StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
+                                                                 
writer->getOutput()->getSize()),
+                                                i, res_data, res_offsets);
+                } else {
+                    // transform jsonb to json
+                    if (!formater) {
+                        formater.reset(new JsonbToJson());
+                    }
+                    StringOP::push_value_string(
+                            
formater->to_json_string(writer->getOutput()->getBuffer(),

Review Comment:
   In this branch, writer is not initialized since ReturnType is not Jsonb. In 
fact, I think format to string is not necessary, since planner will 
automaticlly add cast from JSONB to  String if necessary.



##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -503,6 +508,93 @@ struct JsonbExtractStringImpl {
 
 public:
     // for jsonb_extract_string
+    static Status vector_vector_v2(
+            FunctionContext* context, const ColumnString::Chars& ldata,
+            const ColumnString::Offsets& loffsets, const bool& json_data_const,
+            const std::vector<const ColumnString*>& rdata_columns, // here we 
can support more paths
+            const std::vector<bool>& path_const, ColumnString::Chars& res_data,
+            ColumnString::Offsets& res_offsets, NullMap& null_map, bool& 
is_invalid_json_path) {
+        size_t input_rows_count = json_data_const ? rdata_columns.size() : 
loffsets.size();
+        res_offsets.resize(input_rows_count);
+
+        std::unique_ptr<JsonbWriter> writer;
+        if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+            writer.reset(new JsonbWriter());
+        }
+
+        std::unique_ptr<JsonbToJson> formater;
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map[i]) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map);
+                continue;
+            }
+            size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
+            size_t l_size = loffsets[index_check_const(i, json_data_const)] - 
l_off;
+            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+            if (rdata_columns.size() == 1) { // just return origin value
+                const ColumnString* path_col = rdata_columns[0];
+                const ColumnString::Chars& rdata = path_col->get_chars();
+                const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
+                size_t r_off = roffsets[index_check_const(i, path_const[0]) - 
1];
+                size_t r_size = roffsets[index_check_const(i, path_const[0])] 
- r_off;
+                const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                inner_loop_impl(i, res_data, res_offsets, null_map, writer, 
formater, l_raw, l_size,
+                                r_raw, r_size, is_invalid_json_path);
+            } else { // will make array string to user
+                writer->reset();
+                writer->writeStartArray();
+                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
+                    const ColumnString* path_col = rdata_columns[pi];
+                    const ColumnString::Chars& rdata = path_col->get_chars();
+                    const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
+                    size_t r_off = roffsets[index_check_const(i, 
path_const[pi]) - 1];
+                    size_t r_size = roffsets[index_check_const(i, 
path_const[pi])] - r_off;
+                    const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                    String path(r_raw, r_size);

Review Comment:
   unused variable path



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to