This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new be55cb8dfc [Improve](jsonb_extract) support jsonb_extract multi parse 
path (#21555)
be55cb8dfc is described below

commit be55cb8dfc4da4fefc25eb0f538a71454580b5e9
Author: amory <wangqian...@selectdb.com>
AuthorDate: Wed Jul 12 21:37:36 2023 +0800

    [Improve](jsonb_extract) support jsonb_extract multi parse path (#21555)
    
    support jsonb_extract multi parse path
---
 be/src/vec/functions/function_jsonb.cpp            | 154 +++++++++++++++------
 gensrc/script/doris_builtins_functions.py          |   8 +-
 .../data/jsonb_p0/test_jsonb_load_and_function.out |  23 +++
 .../test_jsonb_load_unique_key_and_function.out    |  23 +++
 .../jsonb_p0/test_jsonb_load_and_function.groovy   |   3 +
 .../test_jsonb_load_unique_key_and_function.groovy |   4 +
 6 files changed, 171 insertions(+), 44 deletions(-)

diff --git a/be/src/vec/functions/function_jsonb.cpp 
b/be/src/vec/functions/function_jsonb.cpp
index 42bdbe4b85..d9e498b4cf 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -339,7 +339,7 @@ using FunctionJsonbParseNotnullErrorValue =
 using FunctionJsonbParseNotnullErrorInvalid =
         FunctionJsonbParseBase<NullalbeMode::NOT_NULL, 
JsonbParseErrorMode::RETURN_INVALID>;
 
-// func(json,string) -> nullable(type)
+// func(jsonb, [varchar, varchar, ...]) -> nullable(type)
 template <typename Impl>
 class FunctionJsonbExtract : public IFunction {
 public:
@@ -347,7 +347,8 @@ public:
     static constexpr auto alias = Impl::alias;
     static FunctionPtr create() { return 
std::make_shared<FunctionJsonbExtract>(); }
     String get_name() const override { return name; }
-    size_t get_number_of_arguments() const override { return 2; }
+    bool is_variadic() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
         return make_nullable(std::make_shared<typename Impl::ReturnType>());
     }
@@ -355,26 +356,32 @@ public:
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) override {
         auto null_map = ColumnUInt8::create(input_rows_count, 0);
-        DCHECK_EQ(arguments.size(), 2);
-        ColumnPtr argument_columns[2];
-        bool col_const[2];
-        for (int i = 0; i < 2; ++i) {
-            std::tie(argument_columns[i], col_const[i]) =
-                    
unpack_if_const(block.get_by_position(arguments[i]).column);
-            check_set_nullable(argument_columns[i], null_map, col_const[i]);
+        DCHECK_GE(arguments.size(), 2);
+
+        ColumnPtr jsonb_data_column;
+        bool jsonb_data_const = false;
+        // prepare jsonb data column
+        std::tie(jsonb_data_column, jsonb_data_const) =
+                unpack_if_const(block.get_by_position(arguments[0]).column);
+        check_set_nullable(jsonb_data_column, null_map, jsonb_data_const);
+        auto& ldata = assert_cast<const 
ColumnString*>(jsonb_data_column.get())->get_chars();
+        auto& loffsets = assert_cast<const 
ColumnString*>(jsonb_data_column.get())->get_offsets();
+
+        // prepare parse path column prepare
+        std::vector<const ColumnString*> jsonb_path_columns;
+        std::vector<bool> path_const(arguments.size() - 1);
+        for (int i = 0; i < arguments.size() - 1; ++i) {
+            ColumnPtr path_column;
+            bool is_const = false;
+            std::tie(path_column, is_const) =
+                    unpack_if_const(block.get_by_position(arguments[i + 
1]).column);
+            path_const[i] = is_const;
+            check_set_nullable(path_column, null_map, path_const[i]);
+            jsonb_path_columns.push_back(assert_cast<const 
ColumnString*>(path_column.get()));
         }
 
         auto res = Impl::ColumnType::create();
 
-        auto jsonb_data_column = assert_cast<const 
ColumnString*>(argument_columns[0].get());
-        auto jsonb_path_column = assert_cast<const 
ColumnString*>(argument_columns[1].get());
-
-        auto& ldata = jsonb_data_column->get_chars();
-        auto& loffsets = jsonb_data_column->get_offsets();
-
-        auto& rdata = jsonb_path_column->get_chars();
-        auto& roffsets = jsonb_path_column->get_offsets();
-
         bool is_invalid_json_path = false;
 
         // execute Impl
@@ -382,36 +389,34 @@ public:
                       std::is_same_v<typename Impl::ReturnType, 
DataTypeJsonb>) {
             auto& res_data = res->get_chars();
             auto& res_offsets = res->get_offsets();
-            if (col_const[0]) {
-                Impl::scalar_vector(context, 
jsonb_data_column->get_data_at(0), rdata, roffsets,
-                                    res_data, res_offsets, 
null_map->get_data(),
-                                    is_invalid_json_path);
-            } else if (col_const[1]) {
-                Impl::vector_scalar(context, ldata, loffsets, 
jsonb_path_column->get_data_at(0),
-                                    res_data, res_offsets, 
null_map->get_data(),
-                                    is_invalid_json_path);
-            } else {
-                Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, 
res_data,
-                                    res_offsets, null_map->get_data(), 
is_invalid_json_path);
+            Status st = Impl::vector_vector_v2(
+                    context, ldata, loffsets, jsonb_data_const, 
jsonb_path_columns, path_const,
+                    res_data, res_offsets, null_map->get_data(), 
is_invalid_json_path);
+            if (st != Status::OK()) {
+                return st;
             }
         } else {
-            if (col_const[0]) {
+            // not support other extract type for now (e.g. int, double, ...)
+            DCHECK_EQ(jsonb_path_columns.size(), 1);
+            auto& rdata = jsonb_path_columns[0]->get_chars();
+            auto& roffsets = jsonb_path_columns[0]->get_offsets();
+            if (jsonb_data_const) {
                 Impl::scalar_vector(context, 
jsonb_data_column->get_data_at(0), rdata, roffsets,
                                     res->get_data(), null_map->get_data(), 
is_invalid_json_path);
-            } else if (col_const[1]) {
-                Impl::vector_scalar(context, ldata, loffsets, 
jsonb_path_column->get_data_at(0),
+            } else if (path_const[0]) {
+                Impl::vector_scalar(context, ldata, loffsets, 
jsonb_path_columns[0]->get_data_at(0),
                                     res->get_data(), null_map->get_data(), 
is_invalid_json_path);
             } else {
                 Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, 
res->get_data(),
                                     null_map->get_data(), 
is_invalid_json_path);
             }
-        }
-
-        if (is_invalid_json_path) {
-            return Status::InvalidArgument(
-                    "Json path error: {} for value: {}",
-                    JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
-                    std::string_view(reinterpret_cast<const 
char*>(rdata.data()), rdata.size()));
+            if (is_invalid_json_path) {
+                return Status::InvalidArgument(
+                        "Json path error: {} for value: {}",
+                        
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+                        std::string_view(reinterpret_cast<const 
char*>(rdata.data()),
+                                         rdata.size()));
+            }
         }
 
         block.get_by_position(result).column =
@@ -503,6 +508,77 @@ private:
 
 public:
     // for jsonb_extract_string
+    static Status vector_vector_v2(
+            FunctionContext* context, const ColumnString::Chars& ldata,
+            const ColumnString::Offsets& loffsets, const bool& json_data_const,
+            const std::vector<const ColumnString*>& rdata_columns, // here we 
can support more paths
+            const std::vector<bool>& path_const, ColumnString::Chars& res_data,
+            ColumnString::Offsets& res_offsets, NullMap& null_map, bool& 
is_invalid_json_path) {
+        size_t input_rows_count = json_data_const ? rdata_columns.size() : 
loffsets.size();
+        res_offsets.resize(input_rows_count);
+
+        auto writer = std::make_unique<JsonbWriter>();
+        std::unique_ptr<JsonbToJson> formater;
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map[i]) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map);
+                continue;
+            }
+            size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
+            size_t l_size = loffsets[index_check_const(i, json_data_const)] - 
l_off;
+            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+            if (rdata_columns.size() == 1) { // just return origin value
+                const ColumnString* path_col = rdata_columns[0];
+                const ColumnString::Chars& rdata = path_col->get_chars();
+                const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
+                size_t r_off = roffsets[index_check_const(i, path_const[0]) - 
1];
+                size_t r_size = roffsets[index_check_const(i, path_const[0])] 
- r_off;
+                const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                inner_loop_impl(i, res_data, res_offsets, null_map, writer, 
formater, l_raw, l_size,
+                                r_raw, r_size, is_invalid_json_path);
+            } else { // will make array string to user
+                writer->reset();
+                writer->writeStartArray();
+                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
+                    const ColumnString* path_col = rdata_columns[pi];
+                    const ColumnString::Chars& rdata = path_col->get_chars();
+                    const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
+                    size_t r_off = roffsets[index_check_const(i, 
path_const[pi]) - 1];
+                    size_t r_size = roffsets[index_check_const(i, 
path_const[pi])] - r_off;
+                    const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                    // doc is NOT necessary to be deleted since JsonbDocument 
will not allocate memory
+                    JsonbDocument* doc = JsonbDocument::createDocument(l_raw, 
l_size);
+                    if (UNLIKELY(!doc || !doc->getValue())) {
+                        writer->writeNull();
+                        continue;
+                    }
+                    // value is NOT necessary to be deleted since JsonbValue 
will not allocate memory
+                    JsonbValue* value =
+                            doc->getValue()->findPath(r_raw, r_size, 
is_invalid_json_path, nullptr);
+                    // if not valid json path , should return error message to 
user
+                    if (is_invalid_json_path) {
+                        return Status::InvalidArgument(
+                                "Json path error: {} for value: {}",
+                                
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+                                std::string_view(reinterpret_cast<const 
char*>(rdata.data()),
+                                                 rdata.size()));
+                    }
+                    if (UNLIKELY(!value)) {
+                        writer->writeNull();
+                    } else {
+                        writer->writeValue(value);
+                    }
+                }
+                writer->writeEndArray();
+                
StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
+                                                             
writer->getOutput()->getSize()),
+                                            i, res_data, res_offsets);
+            }
+        } //for
+        return Status::OK();
+    }
+
     static void vector_vector(FunctionContext* context, const 
ColumnString::Chars& ldata,
                               const ColumnString::Offsets& loffsets,
                               const ColumnString::Chars& rdata,
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index ed17bf524a..bf34a7cf52 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1690,11 +1690,11 @@ visible_functions = {
 
         [['jsonb_exists_path'], 'BOOLEAN', ['JSONB', 'VARCHAR'], ''],
         [['jsonb_exists_path'], 'BOOLEAN', ['JSONB', 'STRING'], ''],
-        [['jsonb_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
+        [['jsonb_type'], 'STRING', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
         [['jsonb_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
 
-        [['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
-        [['jsonb_extract'], 'JSONB', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
+        [['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR', '...'], 
'ALWAYS_NULLABLE'],
+        [['jsonb_extract'], 'JSONB', ['JSONB', 'STRING', '...'], 
'ALWAYS_NULLABLE'],
         [['jsonb_extract_isnull'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 
'ALWAYS_NULLABLE'],
         [['jsonb_extract_isnull'], 'BOOLEAN', ['JSONB', 'STRING'], 
'ALWAYS_NULLABLE'],
         [['jsonb_extract_bool'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 
'ALWAYS_NULLABLE'],
@@ -1725,8 +1725,6 @@ visible_functions = {
         [['json_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
         [['json_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
 
-        [['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
-        [['jsonb_extract'], 'JSONB', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
         [['json_extract_isnull'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 
'ALWAYS_NULLABLE'],
         [['json_extract_isnull'], 'BOOLEAN', ['JSONB', 'STRING'], 
'ALWAYS_NULLABLE'],
         [['json_extract_bool'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 
'ALWAYS_NULLABLE'],
diff --git a/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out 
b/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out
index 00944828bd..e1c176273c 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out
+++ b/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out
@@ -709,6 +709,29 @@
 27     {"k1":"v1","k2":200}    \N
 28     {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}    \N
 
+-- !jsonb_extract_multipath --
+1      \N      \N
+2      null    [null,null,null,null]
+3      true    [true,null,null,null]
+4      false   [false,null,null,null]
+5      100     [100,null,null,null]
+6      10000   [10000,null,null,null]
+7      1000000000      [1000000000,null,null,null]
+8      1152921504606846976     [1152921504606846976,null,null,null]
+9      6.18    [6.18,null,null,null]
+10     "abcd"  ["abcd",null,null,null]
+11     {}      [{},{},null,{}]
+12     {"k1":"v31","k2":300}   
[{"k1":"v31","k2":300},{"k1":"v31","k2":300},"v31",{"k1":"v31","k2":300}]
+13     []      [[],null,null,null]
+14     [123,456]       [[123,456],null,null,123]
+15     ["abc","def"]   [["abc","def"],null,null,"abc"]
+16     [null,true,false,100,6.18,"abc"]        
[[null,true,false,100,6.18,"abc"],null,null,null]
+17     [{"k1":"v41","k2":400},1,"a",3.14]      
[[{"k1":"v41","k2":400},1,"a",3.14],null,null,{"k1":"v41","k2":400}]
+18     {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}   
[{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},"v31",{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}]
+26     \N      \N
+27     {"k1":"v1","k2":200}    
[{"k1":"v1","k2":200},{"k1":"v1","k2":200},"v1",{"k1":"v1","k2":200}]
+28     {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}    
[{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},null,{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}]
+
 -- !jsonb_extract_string_select --
 1      \N      \N
 2      null    null
diff --git 
a/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out 
b/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out
index 04946beaa1..632badf32e 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out
+++ b/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out
@@ -709,6 +709,29 @@
 27     {"k1":"v1","k2":200}    \N
 28     {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}    \N
 
+-- !jsonb_extract_multipath --
+1      \N      \N
+2      null    [null,null,null,null]
+3      true    [true,null,null,null]
+4      false   [false,null,null,null]
+5      100     [100,null,null,null]
+6      10000   [10000,null,null,null]
+7      1000000000      [1000000000,null,null,null]
+8      1152921504606846976     [1152921504606846976,null,null,null]
+9      6.18    [6.18,null,null,null]
+10     "abcd"  ["abcd",null,null,null]
+11     {}      [{},{},null,{}]
+12     {"k1":"v31","k2":300}   
[{"k1":"v31","k2":300},{"k1":"v31","k2":300},"v31",{"k1":"v31","k2":300}]
+13     []      [[],null,null,null]
+14     [123,456]       [[123,456],null,null,123]
+15     ["abc","def"]   [["abc","def"],null,null,"abc"]
+16     [null,true,false,100,6.18,"abc"]        
[[null,true,false,100,6.18,"abc"],null,null,null]
+17     [{"k1":"v41","k2":400},1,"a",3.14]      
[[{"k1":"v41","k2":400},1,"a",3.14],null,null,{"k1":"v41","k2":400}]
+18     {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}   
[{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},"v31",{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}]
+26     \N      \N
+27     {"k1":"v1","k2":200}    
[{"k1":"v1","k2":200},{"k1":"v1","k2":200},"v1",{"k1":"v1","k2":200}]
+28     {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}    
[{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},null,{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}]
+
 -- !jsonb_extract_string_select --
 1      \N      \N
 2      null    null
diff --git 
a/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy 
b/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy
index 6941f335c2..d58b991ef2 100644
--- a/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy
+++ b/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy
@@ -178,6 +178,9 @@ suite("test_jsonb_load_and_function", "p0") {
     qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-1]') FROM ${testTable} 
ORDER BY id"
     qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-10]') FROM ${testTable} 
ORDER BY id"
 
+    // jsonb_extract_multipath
+    qt_jsonb_extract_multipath "SELECT id, j, jsonb_extract(j, '\$', '\$.*', 
'\$.k1', '\$[0]') FROM ${testTable} ORDER BY id"
+
     // jsonb_extract_string
     qt_jsonb_extract_string_select "SELECT id, j, jsonb_extract_string(j, 
'\$') FROM ${testTable} ORDER BY id"
 
diff --git 
a/regression-test/suites/jsonb_p0/test_jsonb_load_unique_key_and_function.groovy
 
b/regression-test/suites/jsonb_p0/test_jsonb_load_unique_key_and_function.groovy
index eee42e3a69..5423ad33fc 100644
--- 
a/regression-test/suites/jsonb_p0/test_jsonb_load_unique_key_and_function.groovy
+++ 
b/regression-test/suites/jsonb_p0/test_jsonb_load_unique_key_and_function.groovy
@@ -171,6 +171,10 @@ suite("test_jsonb_unique_load_and_function", "p0") {
     qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-1]') FROM ${testTable} 
ORDER BY id"
     qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-10]') FROM ${testTable} 
ORDER BY id"
 
+
+    // jsonb_extract_multipath
+    qt_jsonb_extract_multipath "SELECT id, j, jsonb_extract(j, '\$', '\$.*', 
'\$.k1', '\$[0]') FROM ${testTable} ORDER BY id"
+
     // jsonb_extract_string
     qt_jsonb_extract_string_select "SELECT id, j, jsonb_extract_string(j, 
'\$') FROM ${testTable} ORDER BY id"
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to