github-actions[bot] commented on code in PR #36411:
URL: https://github.com/apache/doris/pull/36411#discussion_r1642691741


##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -431,6 +431,176 @@ class FunctionJsonbExtract : public IFunction {
     }
 };
 
+class FunctionJsonbKeys : public IFunction {
+public:
+    static constexpr auto name = "json_keys";
+    static constexpr auto alias = "jsonb_keys";
+    static FunctionPtr create() { return 
std::make_shared<FunctionJsonbKeys>(); }
+    String get_name() const override { return name; }
+    bool is_variadic() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return make_nullable(
+                
std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>())));
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) const override 
{
+        DCHECK_GE(arguments.size(), 1);
+
+        ColumnPtr jsonb_data_column = nullptr;
+        const NullMap* data_null_map = nullptr;
+        // prepare jsonb data column
+        jsonb_data_column = 
unpack_if_const(block.get_by_position(arguments[0]).column).first;
+        if (block.get_by_position(arguments[0]).column->is_nullable()) {
+            const auto* nullable = 
check_and_get_column<ColumnNullable>(jsonb_data_column);
+            jsonb_data_column = nullable->get_nested_column_ptr();
+            data_null_map = &nullable->get_null_map_data();
+        }
+        const ColumnString* col_from_string = 
check_and_get_column<ColumnString>(jsonb_data_column);
+
+        // prepare parse path column prepare, maybe we do not have path column
+        ColumnPtr jsonb_path_column = nullptr;
+        const ColumnString* jsonb_path_col = nullptr;
+        bool path_const = false;
+        const NullMap* path_null_map = nullptr;
+        if (arguments.size() == 2) {
+            // we have should have a ColumnString for path
+            std::tie(jsonb_path_column, path_const) =
+                    
unpack_if_const(block.get_by_position(arguments[1]).column);
+            if (block.get_by_position(arguments[1]).column->is_nullable()) {
+                const auto* nullable = 
check_and_get_column<ColumnNullable>(jsonb_path_column);
+                jsonb_path_column = nullable->get_nested_column_ptr();
+                path_null_map = &nullable->get_null_map_data();
+            }
+            jsonb_path_col = 
check_and_get_column<ColumnString>(jsonb_path_column);
+        } else if (arguments.size() > 2) {
+            // here has argument param error
+            return Status::InvalidArgument("json_keys should have 1 or 2 
arguments");
+        }
+
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+        NullMap& res_null_map = null_map->get_data();
+
+        auto dst_arr = ColumnArray::create(
+                ColumnNullable::create(ColumnString::create(), 
ColumnUInt8::create()),
+                ColumnArray::ColumnOffsets::create());
+        ColumnNullable& dst_nested_column = 
assert_cast<ColumnNullable&>(dst_arr->get_data());
+
+        Status st;
+        if (jsonb_path_column) {
+            if (path_const) {
+                st = inner_loop_impl<true, true>(input_rows_count, *dst_arr, 
dst_nested_column,
+                                                 res_null_map, 
*col_from_string, data_null_map,
+                                                 jsonb_path_col, 
path_null_map);
+            } else {
+                st = inner_loop_impl<true, false>(input_rows_count, *dst_arr, 
dst_nested_column,
+                                                  res_null_map, 
*col_from_string, data_null_map,
+                                                  jsonb_path_col, 
path_null_map);
+            }
+        } else {
+            st = inner_loop_impl<false, false>(input_rows_count, *dst_arr, 
dst_nested_column,
+                                               res_null_map, *col_from_string, 
data_null_map,
+                                               jsonb_path_col, path_null_map);
+        }
+        if (!st.ok()) {
+            return st;
+        }
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(dst_arr), 
std::move(null_map));
+        return st;
+    }
+
+private:
+    template <bool JSONB_PATH_PARAM, bool JSON_PATH_CONST>
+    static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, 
ColumnArray& dst_arr,
+                                                ColumnNullable& 
dst_nested_column,
+                                                NullMap& res_null_map,
+                                                const ColumnString& 
col_from_string,
+                                                const NullMap* 
jsonb_data_nullmap,
+                                                const ColumnString* 
jsonb_path_column,
+                                                const NullMap* path_null_map) {
+        // if path is const, we just need to parse it once
+        JsonbPath const_path;
+        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
+            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
+            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
+                return Status::InvalidArgument(
+                        "Json path error: {} for value: {}",
+                        
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+                        r_raw_ref.to_string());
+            }
+        }
+        const auto& ldata = col_from_string.get_chars();
+        const auto& loffsets = col_from_string.get_offsets();
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            // if jsonb data is null or path column is null , we should return 
null
+            if (jsonb_data_nullmap && (&jsonb_data_nullmap)[i]) {
+                res_null_map[i] = 1;
+                dst_arr.insert_default();
+                continue;
+            }
+            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
+                if (path_null_map && (&path_null_map)[i]) {
+                    res_null_map[i] = 1;
+                    dst_arr.insert_default();
+                    continue;
+                }
+            }
+            // extract jsonb keys
+            size_t l_off = loffsets[i - 1];
+            size_t l_size = loffsets[i] - l_off;
+            if (l_size == 0) {
+                res_null_map[i] = 1;
+                dst_arr.insert_default();
+                continue;
+            }
+            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+            JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
+            if (UNLIKELY(!doc || !doc->getValue())) {

Review Comment:
   warning: boolean expression can be simplified by DeMorgan's theorem 
[readability-simplify-boolean-expr]
   ```cpp
               if (UNLIKELY(!doc || !doc->getValue())) {
                   ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/common/compiler_util.h:35:** expanded from macro 'UNLIKELY'
   ```cpp
   #define UNLIKELY(expr) __builtin_expect(!!(expr), 0)
                                            ^
   ```
   
   </details>
   



##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -431,6 +431,176 @@
     }
 };
 
+class FunctionJsonbKeys : public IFunction {
+public:
+    static constexpr auto name = "json_keys";
+    static constexpr auto alias = "jsonb_keys";
+    static FunctionPtr create() { return 
std::make_shared<FunctionJsonbKeys>(); }
+    String get_name() const override { return name; }
+    bool is_variadic() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return make_nullable(
+                
std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>())));
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) const override 
{
+        DCHECK_GE(arguments.size(), 1);
+
+        ColumnPtr jsonb_data_column = nullptr;
+        const NullMap* data_null_map = nullptr;
+        // prepare jsonb data column
+        jsonb_data_column = 
unpack_if_const(block.get_by_position(arguments[0]).column).first;
+        if (block.get_by_position(arguments[0]).column->is_nullable()) {
+            const auto* nullable = 
check_and_get_column<ColumnNullable>(jsonb_data_column);
+            jsonb_data_column = nullable->get_nested_column_ptr();
+            data_null_map = &nullable->get_null_map_data();
+        }
+        const ColumnString* col_from_string = 
check_and_get_column<ColumnString>(jsonb_data_column);
+
+        // prepare parse path column prepare, maybe we do not have path column
+        ColumnPtr jsonb_path_column = nullptr;
+        const ColumnString* jsonb_path_col = nullptr;
+        bool path_const = false;
+        const NullMap* path_null_map = nullptr;
+        if (arguments.size() == 2) {
+            // we have should have a ColumnString for path
+            std::tie(jsonb_path_column, path_const) =
+                    
unpack_if_const(block.get_by_position(arguments[1]).column);
+            if (block.get_by_position(arguments[1]).column->is_nullable()) {
+                const auto* nullable = 
check_and_get_column<ColumnNullable>(jsonb_path_column);
+                jsonb_path_column = nullable->get_nested_column_ptr();
+                path_null_map = &nullable->get_null_map_data();
+            }
+            jsonb_path_col = 
check_and_get_column<ColumnString>(jsonb_path_column);
+        } else if (arguments.size() > 2) {
+            // here has argument param error
+            return Status::InvalidArgument("json_keys should have 1 or 2 
arguments");
+        }
+
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+        NullMap& res_null_map = null_map->get_data();
+
+        auto dst_arr = ColumnArray::create(
+                ColumnNullable::create(ColumnString::create(), 
ColumnUInt8::create()),
+                ColumnArray::ColumnOffsets::create());
+        ColumnNullable& dst_nested_column = 
assert_cast<ColumnNullable&>(dst_arr->get_data());
+
+        Status st;
+        if (jsonb_path_column) {
+            if (path_const) {
+                st = inner_loop_impl<true, true>(input_rows_count, *dst_arr, 
dst_nested_column,
+                                                 res_null_map, 
*col_from_string, data_null_map,
+                                                 jsonb_path_col, 
path_null_map);
+            } else {
+                st = inner_loop_impl<true, false>(input_rows_count, *dst_arr, 
dst_nested_column,
+                                                  res_null_map, 
*col_from_string, data_null_map,
+                                                  jsonb_path_col, 
path_null_map);
+            }
+        } else {
+            st = inner_loop_impl<false, false>(input_rows_count, *dst_arr, 
dst_nested_column,
+                                               res_null_map, *col_from_string, 
data_null_map,
+                                               jsonb_path_col, path_null_map);
+        }
+        if (!st.ok()) {
+            return st;
+        }
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(dst_arr), 
std::move(null_map));
+        return st;
+    }
+
+private:
+    template <bool JSONB_PATH_PARAM, bool JSON_PATH_CONST>
+    static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, 
ColumnArray& dst_arr,
+                                                ColumnNullable& 
dst_nested_column,
+                                                NullMap& res_null_map,
+                                                const ColumnString& 
col_from_string,
+                                                const NullMap* 
jsonb_data_nullmap,
+                                                const ColumnString* 
jsonb_path_column,
+                                                const NullMap* path_null_map) {
+        // if path is const, we just need to parse it once
+        JsonbPath const_path;
+        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
+            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
+            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
+                return Status::InvalidArgument(
+                        "Json path error: {} for value: {}",
+                        
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+                        r_raw_ref.to_string());
+            }
+        }
+        const auto& ldata = col_from_string.get_chars();
+        const auto& loffsets = col_from_string.get_offsets();
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            // if jsonb data is null or path column is null , we should return 
null
+            if (jsonb_data_nullmap && (&jsonb_data_nullmap)[i]) {
+                res_null_map[i] = 1;
+                dst_arr.insert_default();
+                continue;
+            }
+            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
+                if (path_null_map && (&path_null_map)[i]) {
+                    res_null_map[i] = 1;
+                    dst_arr.insert_default();
+                    continue;
+                }
+            }
+            // extract jsonb keys
+            size_t l_off = loffsets[i - 1];
+            size_t l_size = loffsets[i] - l_off;
+            if (l_size == 0) {
+                res_null_map[i] = 1;
+                dst_arr.insert_default();
+                continue;
+            }
+            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+            JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
+            if (UNLIKELY(!doc || !doc->getValue())) {
+                dst_arr.clear();
+                return Status::InvalidArgument("jsonb data is invalid");
+            }
+            JsonbValue* obj_val;
+            if constexpr (JSONB_PATH_PARAM) {
+                if constexpr (!JSON_PATH_CONST) {
+                    const ColumnString::Chars& rdata = 
jsonb_path_column->get_chars();
+                    const ColumnString::Offsets& roffsets = 
jsonb_path_column->get_offsets();
+                    size_t r_off = roffsets[i - 1];
+                    size_t r_size = roffsets[i] - r_off;
+                    const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
+                    JsonbPath path;
+                    if (!path.seek(r_raw, r_size)) {
+                        return Status::InvalidArgument(
+                                "Json path error: {} for value: {}",
+                                
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+                                std::string_view(reinterpret_cast<const 
char*>(rdata.data()),
+                                                 rdata.size()));
+                    }
+                    obj_val = doc->getValue()->findValue(path, nullptr);
+                } else {
+                    obj_val = doc->getValue()->findValue(const_path, nullptr);
+                }
+            } else {
+                obj_val = doc->getValue();
+            }
+
+            if (!obj_val || !obj_val->isObject()) {
+                // if jsonb data is not object we should return null
+                res_null_map[i] = 1;
+                dst_arr.insert_default();
+                continue;
+            }
+            ObjectVal* obj = (ObjectVal*)obj_val;

Review Comment:
   warning: use auto when initializing with a cast to avoid duplicating the 
type name [modernize-use-auto]
   
   ```suggestion
               auto* obj = (ObjectVal*)obj_val;
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to