(doris) branch master updated: [feature](hive)support hive catalog read json table. (#43469)

morningman Thu, 28 Nov 2024 06:13:43 -0800

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 358ed8caac2 [feature](hive)support hive catalog read json table.  
(#43469)
358ed8caac2 is described below

commit 358ed8caac231f88f0ce40d484ac5c2fdc626f8b
Author: daidai <changyu...@selectdb.com>
AuthorDate: Thu Nov 28 22:13:27 2024 +0800

    [feature](hive)support hive catalog read json table.  (#43469)
    
    ### What problem does this PR solve?
    
    Problem Summary:
    Support reading json format hive table like:
    ```mysql
    mysql> show create table basic_json_table;
    CREATE TABLE `basic_json_table`(
      `id` int,
      `name` string,
      `age` tinyint,
      `salary` float,
      `is_active` boolean,
      `join_date` date,
      `last_login` timestamp,
      `height` double,
      `profile` binary,
      `rating` decimal(10,2))
    ROW FORMAT SERDE
      'org.apache.hive.hcatalog.data.JsonSerDe'
    ```
    
    Behavior changed:
    To implement this feature, this pr modifies `new_json_reader`.
    Previously, `new_json_reader` could only insert data into columnString.
    In order to support inserting data into columns of other types,
    `DataTypeSerDe` is introduced to insert data into columns. To maintain
    compatibility with previous versions, changes to this pr are triggered
    only when reading hive json tables.
    
    Limitation of Use:
    1. Currently, only query is supported, and writing is not supported.
    2. Currently, only the `ROW FORMAT SERDE
    'org.apache.hive.hcatalog.data.JsonSerDe';` scenario is supported. For
    some properties specified in `with serdeproperties`, Doris does not take
    effect.
    3. Since Hive does not allow columns with the same name but different
    case when creating a table in Json format (including inside a Struct),
    we convert the field names in the Json data to lowercase when reading
    the Json data file, and then match according to the lowercase field
    names. For field names that are duplicated after being converted to
    lowercase in the data, the value of the last field is used (consistent
    with Hive behavior).
    example:
    ```
    create table json_table(
        column int
    )ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe';
    
    a.json:
    {"column":1,"COLumn",2,"COLUMN":3}
    {"column":10,"COLumn",20}
    {"column":100}
    in Hive : load a.json to table json_table
    
    in Doris query:
    ---
    3
    20
    100
    ---
    ```
    
    Todo(in next pr):
    Merge `serde` and `json_reader` ,because they have logical conflicts.
    
    ### Release note
    Hive catalog support read json format table.
---
 .../vec/data_types/serde/data_type_array_serde.h   |   2 +
 be/src/vec/data_types/serde/data_type_map_serde.h  |   4 +
 .../data_types/serde/data_type_nullable_serde.h    |   2 +
 be/src/vec/data_types/serde/data_type_serde.h      |  12 +-
 .../vec/data_types/serde/data_type_struct_serde.h  |   2 +
 be/src/vec/exec/format/json/new_json_reader.cpp    | 559 +++++++++++++++------
 be/src/vec/exec/format/json/new_json_reader.h      |  32 +-
 be/src/vec/exec/scan/vfile_scanner.cpp             |   4 +-
 .../scripts/create_preinstalled_scripts/run69.hql  |  35 ++
 .../scripts/create_preinstalled_scripts/run70.hql  |  73 +++
 .../scripts/create_preinstalled_scripts/run71.hql  |  13 +
 .../json/json_all_complex_types/dt=dt1/000000_0    |   3 +
 .../json/json_all_complex_types/dt=dt2/000000_0    |   1 +
 .../json/json_all_complex_types/dt=dt3/000000_0    |   2 +
 .../preinstalled_data/json/json_load_data_table/1  |  13 +
 .../json/json_nested_complex_table/1               |   2 +
 .../json/json_nested_complex_table/2               |   1 +
 .../json/json_nested_complex_table/modify_2        |   2 +
 .../datasource/hive/HiveMetaStoreClientHelper.java |   3 +
 .../doris/datasource/hive/source/HiveScanNode.java |  41 +-
 .../hive/hive_json_basic_test.out                  | 115 +++++
 .../hive/hive_json_basic_test.groovy               |  71 +++
 22 files changed, 832 insertions(+), 160 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h 
b/be/src/vec/data_types/serde/data_type_array_serde.h
index 5b15f48f502..cdd21155760 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.h
+++ b/be/src/vec/data_types/serde/data_type_array_serde.h
@@ -101,6 +101,8 @@ public:
         nested_serde->set_return_object_as_string(value);
     }
 
+    virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return 
{nested_serde}; }
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.h 
b/be/src/vec/data_types/serde/data_type_map_serde.h
index 599533eb4d0..51c11300d44 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.h
+++ b/be/src/vec/data_types/serde/data_type_map_serde.h
@@ -96,6 +96,10 @@ public:
         value_serde->set_return_object_as_string(value);
     }
 
+    virtual DataTypeSerDeSPtrs get_nested_serdes() const override {
+        return {key_serde, value_serde};
+    }
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h 
b/be/src/vec/data_types/serde/data_type_nullable_serde.h
index 6051c7f722d..159db890540 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.h
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h
@@ -99,6 +99,8 @@ public:
                                   int64_t row_num) const override;
     Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& 
result) const override;
 
+    virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return 
{nested_serde}; }
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index f0e9eb27961..bec0fabfcca 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -100,6 +100,10 @@ class IColumn;
 class Arena;
 class IDataType;
 
+class DataTypeSerDe;
+using DataTypeSerDeSPtr = std::shared_ptr<DataTypeSerDe>;
+using DataTypeSerDeSPtrs = std::vector<DataTypeSerDeSPtr>;
+
 // Deserialize means read from different file format or memory format,
 // for example read from arrow, read from parquet.
 // Serialize means write the column cell or the total column into another
@@ -337,6 +341,11 @@ public:
                                           Arena& mem_pool, int64_t row_num) 
const;
     virtual Status read_one_cell_from_json(IColumn& column, const 
rapidjson::Value& result) const;
 
+    virtual DataTypeSerDeSPtrs get_nested_serdes() const {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+                               "Method get_nested_serdes is not supported for 
this serde");
+    }
+
 protected:
     bool _return_object_as_string = false;
     // This parameter indicates what level the serde belongs to and is mainly 
used for complex types
@@ -379,9 +388,6 @@ inline void checkArrowStatus(const arrow::Status& status, 
const std::string& col
     }
 }
 
-using DataTypeSerDeSPtr = std::shared_ptr<DataTypeSerDe>;
-using DataTypeSerDeSPtrs = std::vector<DataTypeSerDeSPtr>;
-
 DataTypeSerDeSPtrs create_data_type_serdes(
         const std::vector<std::shared_ptr<const IDataType>>& types);
 DataTypeSerDeSPtrs create_data_type_serdes(const std::vector<SlotDescriptor*>& 
slots);
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.h 
b/be/src/vec/data_types/serde/data_type_struct_serde.h
index 2ec871838d7..fc77e26b71f 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.h
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.h
@@ -172,6 +172,8 @@ public:
         }
     }
 
+    virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return 
elem_serdes_ptrs; }
+
 private:
     std::optional<size_t> try_get_position_by_name(const String& name) const;
 
diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index d726de3d998..d79e8652074 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -54,8 +54,11 @@
 #include "util/slice.h"
 #include "util/uid_util.h"
 #include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_map.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
+#include "vec/columns/column_struct.h"
 #include "vec/common/assert_cast.h"
 #include "vec/common/typeid_cast.h"
 #include "vec/core/block.h"
@@ -164,10 +167,18 @@ void NewJsonReader::_init_file_description() {
 }
 
 Status NewJsonReader::init_reader(
-        const std::unordered_map<std::string, VExprContextSPtr>& 
col_default_value_ctx) {
+        const std::unordered_map<std::string, VExprContextSPtr>& 
col_default_value_ctx,
+        bool is_load) {
+    _is_load = is_load;
+
     // generate _col_default_value_map
     RETURN_IF_ERROR(_get_column_default_value(_file_slot_descs, 
col_default_value_ctx));
 
+    //use serde insert data to column.
+    for (auto* slot_desc : _file_slot_descs) {
+        _serdes.emplace_back(slot_desc->get_data_type_ptr()->get_serde());
+    }
+
     // create decompressor.
     // _decompressor may be nullptr if this is not a compressed file
     RETURN_IF_ERROR(Decompressor::create_decompressor(_file_compress_type, 
&_decompressor));
@@ -390,6 +401,9 @@ Status NewJsonReader::_get_range_params() {
     if (_params.file_attributes.__isset.fuzzy_parse) {
         _fuzzy_parse = _params.file_attributes.fuzzy_parse;
     }
+    if (_range.table_format_params.table_format_type == "hive") {
+        _is_hive_table = true;
+    }
     return Status::OK();
 }
 
@@ -477,8 +491,8 @@ Status NewJsonReader::_vhandle_simple_json(RuntimeState* 
/*state*/, Block& block
         bool valid = false;
         if (_next_row >= _total_rows) { // parse json and generic document
             Status st = _parse_json(is_empty_row, eof);
-            if (st.is<DATA_QUALITY_ERROR>()) {
-                continue; // continue to read next
+            if (_is_load && st.is<DATA_QUALITY_ERROR>()) {
+                continue; // continue to read next (for load, after this , 
already append error to file.)
             }
             RETURN_IF_ERROR(st);
             if (*is_empty_row) {
@@ -761,7 +775,20 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& 
objectValue, Block& bl
         _append_empty_skip_bitmap_value(block, cur_row_count);
     }
 
-    for (auto* slot_desc : slot_descs) {
+    if (_is_hive_table) {
+        //don't like _fuzzy_parse,each line read in must modify name_map once.
+
+        for (int i = 0; i < objectValue.MemberCount(); ++i) {
+            auto it = objectValue.MemberBegin() + i;
+            std::string field_name(it->name.GetString(), 
it->name.GetStringLength());
+            std::transform(field_name.begin(), field_name.end(), 
field_name.begin(), ::tolower);
+
+            //Use the last value with the same name.
+            _name_map.emplace(field_name, i);
+        }
+    }
+    for (size_t slot_idx = 0; slot_idx < slot_descs.size(); ++slot_idx) {
+        auto* slot_desc = slot_descs[slot_idx];
         if (!slot_desc->is_materialized()) {
             continue;
         }
@@ -773,7 +800,7 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& 
objectValue, Block& bl
         auto* column_ptr = 
block.get_by_position(dest_index).column->assume_mutable().get();
         rapidjson::Value::ConstMemberIterator it = objectValue.MemberEnd();
 
-        if (_fuzzy_parse) {
+        if (_fuzzy_parse || _is_hive_table) {
             auto idx_it = _name_map.find(slot_desc->col_name());
             if (idx_it != _name_map.end() && idx_it->second < 
objectValue.MemberCount()) {
                 it = objectValue.MemberBegin() + idx_it->second;
@@ -785,7 +812,8 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& 
objectValue, Block& bl
 
         if (it != objectValue.MemberEnd()) {
             const rapidjson::Value& value = it->value;
-            RETURN_IF_ERROR(_write_data_to_column(&value, slot_desc, 
column_ptr, valid));
+            RETURN_IF_ERROR(_write_data_to_column(&value, slot_desc->type(), 
column_ptr,
+                                                  slot_desc->col_name(), 
_serdes[slot_idx], valid));
             if (!(*valid)) {
                 return Status::OK();
             }
@@ -814,14 +842,15 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& 
objectValue, Block& bl
                 column_ptr->insert_default();
             } else {
                 // not found, filling with default value
-                RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, 
valid));
+                RETURN_IF_ERROR(
+                        _fill_missing_column(slot_desc, _serdes[slot_idx], 
column_ptr, valid));
                 if (!(*valid)) {
                     return Status::OK();
                 }
             }
         }
     }
-    if (!has_valid_value) {
+    if (!has_valid_value && _is_load) {
         // there is no valid value in json line but has filled with default 
value before
         // so remove this line in block
         string col_names;
@@ -845,79 +874,188 @@ Status 
NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl
 }
 
 Status 
NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator value,
-                                            SlotDescriptor* slot_desc, 
IColumn* column_ptr,
+                                            const TypeDescriptor& type_desc,
+                                            vectorized::IColumn* column_ptr,
+                                            const std::string& column_name, 
DataTypeSerDeSPtr serde,
                                             bool* valid) {
-    const char* str_value = nullptr;
-    char tmp_buf[128] = {0};
-    int32_t wbytes = 0;
-    std::string json_str;
-
     ColumnNullable* nullable_column = nullptr;
-    if (slot_desc->is_nullable()) {
+    vectorized::IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+
+    bool value_is_null = (value == nullptr) || (value->GetType() == 
rapidjson::Type::kNullType);
+
+    if (column_ptr->is_nullable()) {
         nullable_column = reinterpret_cast<ColumnNullable*>(column_ptr);
-        // kNullType will put 1 into the Null map, so there is no need to push 
0 for kNullType.
-        if (value->GetType() != rapidjson::Type::kNullType) {
+        data_column_ptr = nullable_column->get_nested_column().get_ptr();
+        data_serde = serde->get_nested_serdes()[0];
+
+        if (value_is_null) {
+            nullable_column->insert_default();
+            *valid = true;
+            return Status::OK();
+        } else {
             nullable_column->get_null_map_data().push_back(0);
+        }
+
+    } else if (value_is_null) [[unlikely]] {
+        if (_is_load) {
+            RETURN_IF_ERROR(_append_error_msg(
+                    *value, "Json value is null, but the column `{}` is not 
nullable.", column_name,
+                    valid));
+            return Status::OK();
+
         } else {
-            nullable_column->insert_default();
+            return Status::DataQualityError(
+                    "Json value is null, but the column `{}` is not 
nullable.", column_name);
         }
-        column_ptr = &nullable_column->get_nested_column();
     }
 
-    switch (value->GetType()) {
-    case rapidjson::Type::kStringType:
-        str_value = value->GetString();
-        wbytes = value->GetStringLength();
-        break;
-    case rapidjson::Type::kNumberType:
-        if (value->IsUint()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%u", 
value->GetUint());
-        } else if (value->IsInt()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", value->GetInt());
-        } else if (value->IsUint64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRIu64, 
value->GetUint64());
-        } else if (value->IsInt64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, 
value->GetInt64());
-        } else if (value->IsFloat() || value->IsDouble()) {
-            auto* end = fmt::format_to(tmp_buf, "{}", value->GetDouble());
-            wbytes = end - tmp_buf;
+    if (_is_load || !type_desc.is_complex_type()) {
+        if (value->IsString()) {
+            Slice slice {value->GetString(), value->GetStringLength()};
+            
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                       
_serde_options));
+
         } else {
-            return Status::InternalError<false>("It should not here.");
+            // We can `switch (value->GetType()) case: kNumberType`.
+            // Note that `if (value->IsInt())`, but column is FloatColumn.
+            // Or for any type, use `NewJsonReader::_print_json_value(*value)`.
+
+            const char* str_value = nullptr;
+            char tmp_buf[128] = {0};
+            size_t wbytes = 0;
+            std::string json_str;
+
+            switch (value->GetType()) {
+            case rapidjson::Type::kStringType:
+                str_value = value->GetString();
+                wbytes = value->GetStringLength();
+                break;
+            case rapidjson::Type::kNumberType:
+                if (value->IsUint()) {
+                    wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%u", 
value->GetUint());
+                } else if (value->IsInt()) {
+                    wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", 
value->GetInt());
+                } else if (value->IsUint64()) {
+                    wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRIu64, 
value->GetUint64());
+                } else if (value->IsInt64()) {
+                    wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, 
value->GetInt64());
+                } else if (value->IsFloat() || value->IsDouble()) {
+                    auto* end = fmt::format_to(tmp_buf, "{}", 
value->GetDouble());
+                    wbytes = end - tmp_buf;
+                } else {
+                    return Status::InternalError<false>("It should not here.");
+                }
+                str_value = tmp_buf;
+                break;
+            case rapidjson::Type::kFalseType:
+                wbytes = 1;
+                str_value = (char*)"0";
+                break;
+            case rapidjson::Type::kTrueType:
+                wbytes = 1;
+                str_value = (char*)"1";
+                break;
+            default:
+                // for other type, we convert it to string to save
+                json_str = NewJsonReader::_print_json_value(*value);
+                wbytes = json_str.size();
+                str_value = json_str.c_str();
+                break;
+            }
+            Slice slice {str_value, wbytes};
+            
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                       
_serde_options));
         }
-        str_value = tmp_buf;
-        break;
-    case rapidjson::Type::kFalseType:
-        wbytes = 1;
-        str_value = (char*)"0";
-        break;
-    case rapidjson::Type::kTrueType:
-        wbytes = 1;
-        str_value = (char*)"1";
-        break;
-    case rapidjson::Type::kNullType:
-        if (!slot_desc->is_nullable()) {
-            RETURN_IF_ERROR(_append_error_msg(
-                    *value, "Json value is null, but the column `{}` is not 
nullable.",
-                    slot_desc->col_name(), valid));
-            return Status::OK();
+    } else if (type_desc.type == TYPE_STRUCT) {
+        if (!value->IsObject()) [[unlikely]] {
+            return Status::DataQualityError(
+                    "Json value isn't object, but the column `{}` is struct.", 
column_name);
         }
 
-        // return immediately to prevent from repeatedly insert_data
-        *valid = true;
-        return Status::OK();
-    default:
-        // for other type like array or object. we convert it to string to save
-        json_str = NewJsonReader::_print_json_value(*value);
-        wbytes = json_str.size();
-        str_value = json_str.c_str();
-        break;
-    }
+        auto sub_col_size = type_desc.children.size();
+        const auto& struct_value = value->GetObject();
 
-    // TODO: if the vexpr can support another 'slot_desc type' than 
'TYPE_VARCHAR',
-    // we need use a function to support these types to insert data in columns.
-    DCHECK(slot_desc->type().type == TYPE_VARCHAR || slot_desc->type().type == 
TYPE_STRING)
-            << slot_desc->type().type << ", query id: " << 
print_id(_state->query_id());
-    assert_cast<ColumnString*>(column_ptr)->insert_data(str_value, wbytes);
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto struct_column_ptr = assert_cast<ColumnStruct*>(data_column_ptr);
+
+        std::map<std::string, size_t> sub_col_name_to_idx;
+        for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; 
sub_col_idx++) {
+            sub_col_name_to_idx.emplace(type_desc.field_names[sub_col_idx], 
sub_col_idx);
+        }
+
+        std::vector<rapidjson::Value::ConstValueIterator> 
sub_values(sub_col_size, nullptr);
+        for (const auto& sub : struct_value) {
+            if (!sub.name.IsString()) [[unlikely]] {
+                return Status::DataQualityError(
+                        "Json file struct column `{}` subfield name isn't a 
String", column_name);
+            }
+
+            auto sub_key_char = sub.name.GetString();
+            auto sub_key_length = sub.name.GetStringLength();
+
+            std::string sub_key(sub_key_char, sub_key_length);
+            std::transform(sub_key.begin(), sub_key.end(), sub_key.begin(), 
::tolower);
+
+            if (sub_col_name_to_idx.find(sub_key) == 
sub_col_name_to_idx.end()) [[unlikely]] {
+                continue;
+            }
+            size_t sub_column_idx = sub_col_name_to_idx[sub_key];
+            sub_values[sub_column_idx] = &sub.value;
+        }
+
+        for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; 
sub_col_idx++) {
+            auto sub_value = sub_values[sub_col_idx];
+
+            const auto& sub_col_type = type_desc.children[sub_col_idx];
+
+            RETURN_IF_ERROR(_write_data_to_column(
+                    sub_value, sub_col_type, 
struct_column_ptr->get_column(sub_col_idx).get_ptr(),
+                    column_name + "." + type_desc.field_names[sub_col_idx], 
sub_serdes[sub_col_idx],
+                    valid));
+        }
+    } else if (type_desc.type == TYPE_MAP) {
+        if (!value->IsObject()) [[unlikely]] {
+            return Status::DataQualityError("Json value isn't object, but the 
column `{}` is map.",
+                                            column_name);
+        }
+        const auto& object_value = value->GetObject();
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto map_column_ptr = assert_cast<ColumnMap*>(data_column_ptr);
+
+        for (const auto& member_value : object_value) {
+            RETURN_IF_ERROR(_write_data_to_column(
+                    &member_value.name, type_desc.children[0],
+                    
map_column_ptr->get_keys_ptr()->assume_mutable()->get_ptr(),
+                    column_name + ".key", sub_serdes[0], valid));
+
+            RETURN_IF_ERROR(_write_data_to_column(
+                    &member_value.value, type_desc.children[1],
+                    
map_column_ptr->get_values_ptr()->assume_mutable()->get_ptr(),
+                    column_name + ".value", sub_serdes[1], valid));
+        }
+
+        auto& offsets = map_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + object_value.MemberCount());
+    } else if (type_desc.type == TYPE_ARRAY) {
+        if (!value->IsArray()) [[unlikely]] {
+            return Status::DataQualityError("Json value isn't array, but the 
column `{}` is array.",
+                                            column_name);
+        }
+        const auto& array_value = value->GetArray();
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto array_column_ptr = assert_cast<ColumnArray*>(data_column_ptr);
+
+        for (const auto& sub_value : array_value) {
+            RETURN_IF_ERROR(_write_data_to_column(&sub_value, 
type_desc.children[0],
+                                                  
array_column_ptr->get_data().get_ptr(),
+                                                  column_name + ".element", 
sub_serdes[0], valid));
+        }
+        auto& offsets = array_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + array_value.Size());
+    } else {
+        return Status::InternalError("Not support load to complex column.");
+    }
 
     *valid = true;
     return Status::OK();
@@ -949,20 +1087,21 @@ Status 
NewJsonReader::_write_columns_by_jsonpath(rapidjson::Value& objectValue,
                 // if json_values' size > 1, it means we just match an array, 
not a wrapped one, so no need to unwrap.
                 json_values = &((*json_values)[0]);
             }
-            RETURN_IF_ERROR(_write_data_to_column(json_values, slot_descs[i], 
column_ptr, valid));
+            RETURN_IF_ERROR(_write_data_to_column(json_values, 
slot_descs[i]->type(), column_ptr,
+                                                  slot_descs[i]->col_name(), 
_serdes[i], valid));
             if (!(*valid)) {
                 return Status::OK();
             }
             has_valid_value = true;
         } else {
             // not found, filling with default value
-            RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, 
valid));
+            RETURN_IF_ERROR(_fill_missing_column(slot_desc, _serdes[i], 
column_ptr, valid));
             if (!(*valid)) {
                 return Status::OK();
             }
         }
     }
-    if (!has_valid_value) {
+    if (!has_valid_value && _is_load) {
         // there is no valid value in json line but has filled with default 
value before
         // so remove this line in block
         for (int i = 0; i < block.columns(); ++i) {
@@ -1112,7 +1251,7 @@ Status 
NewJsonReader::_simdjson_handle_simple_json(RuntimeState* /*state*/, Bloc
 
         // step2: get json value by json doc
         Status st = _get_json_value(&size, eof, &error, is_empty_row);
-        if (st.is<DATA_QUALITY_ERROR>()) {
+        if (_is_load && st.is<DATA_QUALITY_ERROR>()) {
             return Status::OK();
         }
         RETURN_IF_ERROR(st);
@@ -1386,28 +1525,42 @@ Status 
NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
     for (auto field : *value) {
         std::string_view key = field.unescaped_key();
         StringRef name_ref(key.data(), key.size());
+        std::string key_string;
+        if (_is_hive_table) {
+            key_string = name_ref.to_string();
+            std::transform(key_string.begin(), key_string.end(), 
key_string.begin(), ::tolower);
+            name_ref = StringRef(key_string);
+        }
         const size_t column_index = _column_index(name_ref, key_index++);
         if (UNLIKELY(ssize_t(column_index) < 0)) {
             // This key is not exist in slot desc, just ignore
             continue;
         }
-        if (_seen_columns[column_index]) {
-            continue;
-        }
         if (column_index == skip_bitmap_col_idx) {
             continue;
         }
+        if (_seen_columns[column_index]) {
+            if (_is_hive_table) {
+                //Since value can only be traversed once,
+                // we can only insert the original value first, then delete 
it, and then reinsert the new value
+                
block.get_by_position(column_index).column->assume_mutable()->pop_back(1);
+            } else {
+                continue;
+            }
+        }
         simdjson::ondemand::value val = field.value();
         auto* column_ptr = 
block.get_by_position(column_index).column->assume_mutable().get();
-        RETURN_IF_ERROR(
-                _simdjson_write_data_to_column(val, slot_descs[column_index], 
column_ptr, valid));
+        RETURN_IF_ERROR(_simdjson_write_data_to_column(
+                val, slot_descs[column_index]->type(), column_ptr,
+                slot_descs[column_index]->col_name(), _serdes[column_index], 
valid));
         if (!(*valid)) {
             return Status::OK();
         }
         _seen_columns[column_index] = true;
         has_valid_value = true;
     }
-    if (!has_valid_value) {
+
+    if (!has_valid_value && _is_load) {
         string col_names;
         for (auto* slot_desc : slot_descs) {
             col_names.append(slot_desc->col_name() + ", ");
@@ -1472,7 +1625,7 @@ Status 
NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
                 _process_skip_bitmap_mark(slot_desc, column_ptr, block, 
cur_row_count, valid);
                 column_ptr->insert_default();
             } else {
-                RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, 
valid));
+                RETURN_IF_ERROR(_fill_missing_column(slot_desc, _serdes[i], 
column_ptr, valid));
                 if (!(*valid)) {
                     return Status::OK();
                 }
@@ -1482,12 +1635,6 @@ Status 
NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
         DCHECK(column_ptr->size() == cur_row_count + 1);
     }
 
-#ifndef NDEBUG
-    // Check all columns rows matched
-    for (size_t i = 0; i < block.columns(); ++i) {
-        DCHECK_EQ(block.get_by_position(i).column->size(), cur_row_count + 1);
-    }
-#endif
     // There is at least one valid value here
     DCHECK(nullcount < block.columns());
     *valid = true;
@@ -1495,54 +1642,180 @@ Status 
NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
 }
 
 Status 
NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& value,
-                                                     SlotDescriptor* 
slot_desc, IColumn* column,
-                                                     bool* valid) {
-    // write
+                                                     const TypeDescriptor& 
type_desc,
+                                                     vectorized::IColumn* 
column_ptr,
+                                                     const std::string& 
column_name,
+                                                     DataTypeSerDeSPtr serde, 
bool* valid) {
     ColumnNullable* nullable_column = nullptr;
-    IColumn* column_ptr = nullptr;
-    if (slot_desc->is_nullable()) {
-        nullable_column = assert_cast<ColumnNullable*>(column);
-        column_ptr = &nullable_column->get_nested_column();
-    }
-    // TODO: if the vexpr can support another 'slot_desc type' than 
'TYPE_VARCHAR',
-    // we need use a function to support these types to insert data in columns.
-    auto* column_string = assert_cast<ColumnString*>(column_ptr);
-    switch (value.type()) {
-    case simdjson::ondemand::json_type::null: {
-        if (column->is_nullable()) {
-            // insert_default already push 1 to null_map
-            nullable_column->insert_default();
+    vectorized::IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+
+    if (column_ptr->is_nullable()) {
+        nullable_column = reinterpret_cast<ColumnNullable*>(column_ptr);
+
+        data_column_ptr = nullable_column->get_nested_column().get_ptr();
+        data_serde = serde->get_nested_serdes()[0];
+
+        // kNullType will put 1 into the Null map, so there is no need to push 
0 for kNullType.
+        if (value.type() != simdjson::ondemand::json_type::null) {
+            nullable_column->get_null_map_data().push_back(0);
         } else {
+            nullable_column->insert_default();
+            *valid = true;
+            return Status::OK();
+        }
+    } else if (value.type() == simdjson::ondemand::json_type::null) 
[[unlikely]] {
+        if (_is_load) {
             RETURN_IF_ERROR(_append_error_msg(
                     nullptr, "Json value is null, but the column `{}` is not 
nullable.",
-                    slot_desc->col_name(), valid));
+                    column_name, valid));
             return Status::OK();
-        }
-        break;
-    }
-    case simdjson::ondemand::json_type::boolean: {
-        nullable_column->get_null_map_data().push_back(0);
-        if (value.get_bool()) {
-            column_string->insert_data("1", 1);
         } else {
-            column_string->insert_data("0", 1);
+            return Status::DataQualityError(
+                    "Json value is null, but the column `{}` is not 
nullable.", column_name);
         }
-        break;
     }
-    default: {
+
+    if (_is_load || !type_desc.is_complex_type()) {
         if (value.type() == simdjson::ondemand::json_type::string) {
-            auto* unescape_buffer =
-                    
reinterpret_cast<uint8_t*>(_simdjson_ondemand_unscape_padding_buffer.data());
-            std::string_view unescaped_value =
-                    
_ondemand_json_parser->unescape(value.get_raw_json_string(), unescape_buffer);
-            nullable_column->get_null_map_data().push_back(0);
-            column_string->insert_data(unescaped_value.data(), 
unescaped_value.length());
-            break;
+            std::string_view value_string = value.get_string();
+            Slice slice {value_string.data(), value_string.size()};
+            
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                       
_serde_options));
+
+        } else {
+            // Maybe we can `switch (value->GetType()) case: kNumberType`.
+            // Note that `if (value->IsInt())`, but column is FloatColumn.
+            std::string_view json_str = simdjson::to_json_string(value);
+            Slice slice {json_str.data(), json_str.size()};
+            
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                       
_serde_options));
+        }
+    } else if (type_desc.type == TYPE_STRUCT) {
+        if (value.type() != simdjson::ondemand::json_type::object) 
[[unlikely]] {
+            return Status::DataQualityError(
+                    "Json value isn't object, but the column `{}` is struct.", 
column_name);
+        }
+
+        auto sub_col_size = type_desc.children.size();
+        simdjson::ondemand::object struct_value = value.get_object();
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto struct_column_ptr = assert_cast<ColumnStruct*>(data_column_ptr);
+
+        std::map<std::string, size_t> sub_col_name_to_idx;
+        for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; 
sub_col_idx++) {
+            sub_col_name_to_idx.emplace(type_desc.field_names[sub_col_idx], 
sub_col_idx);
+        }
+        vector<bool> has_value(sub_col_size, false);
+        for (simdjson::ondemand::field sub : struct_value) {
+            std::string_view sub_key_view = sub.unescaped_key();
+            std::string sub_key(sub_key_view.data(), sub_key_view.length());
+            std::transform(sub_key.begin(), sub_key.end(), sub_key.begin(), 
::tolower);
+
+            if (sub_col_name_to_idx.find(sub_key) == 
sub_col_name_to_idx.end()) [[unlikely]] {
+                continue;
+            }
+            size_t sub_column_idx = sub_col_name_to_idx[sub_key];
+            auto sub_column_ptr = 
struct_column_ptr->get_column(sub_column_idx).get_ptr();
+
+            if (has_value[sub_column_idx]) [[unlikely]] {
+                // Since struct_value can only be traversed once, we can only 
insert
+                // the original value first, then delete it, and then reinsert 
the new value.
+                sub_column_ptr->pop_back(1);
+            }
+            has_value[sub_column_idx] = true;
+
+            const auto& sub_col_type = type_desc.children[sub_column_idx];
+            RETURN_IF_ERROR(_simdjson_write_data_to_column(
+                    sub.value(), sub_col_type, sub_column_ptr, column_name + 
"." + sub_key,
+                    sub_serdes[sub_column_idx], valid));
         }
-        auto value_str = simdjson::to_json_string(value).value();
-        nullable_column->get_null_map_data().push_back(0);
-        column_string->insert_data(value_str.data(), value_str.length());
-    }
+
+        //fill missing subcolumn
+        for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; 
sub_col_idx++) {
+            if (has_value[sub_col_idx] == true) {
+                continue;
+            }
+
+            auto sub_column_ptr = 
struct_column_ptr->get_column(sub_col_idx).get_ptr();
+            if (sub_column_ptr->is_nullable()) {
+                sub_column_ptr->insert_default();
+                continue;
+            } else [[unlikely]] {
+                return Status::DataQualityError(
+                        "Json file structColumn miss field {} and this column 
isn't nullable.",
+                        column_name + "." + 
type_desc.field_names[sub_col_idx]);
+            }
+        }
+    } else if (type_desc.type == TYPE_MAP) {
+        if (value.type() != simdjson::ondemand::json_type::object) 
[[unlikely]] {
+            return Status::DataQualityError("Json value isn't object, but the 
column `{}` is map.",
+                                            column_name);
+        }
+        simdjson::ondemand::object object_value = value.get_object();
+
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto map_column_ptr = assert_cast<ColumnMap*>(data_column_ptr);
+
+        size_t field_count = 0;
+        for (simdjson::ondemand::field member_value : object_value) {
+            auto f = [](std::string_view key_view, const TypeDescriptor& 
type_desc,
+                        vectorized::IColumn* column_ptr, DataTypeSerDeSPtr 
serde,
+                        vectorized::DataTypeSerDe::FormatOptions 
serde_options, bool* valid) {
+                auto data_column_ptr = column_ptr;
+                auto data_serde = serde;
+                if (column_ptr->is_nullable()) {
+                    auto nullable_column = 
static_cast<ColumnNullable*>(column_ptr);
+
+                    nullable_column->get_null_map_data().push_back(0);
+                    data_column_ptr = 
nullable_column->get_nested_column().get_ptr();
+                    data_serde = serde->get_nested_serdes()[0];
+                }
+                Slice slice(key_view.data(), key_view.length());
+
+                
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                           
serde_options));
+                return Status::OK();
+            };
+
+            RETURN_IF_ERROR(f(member_value.unescaped_key(), 
type_desc.children[0],
+                              
map_column_ptr->get_keys_ptr()->assume_mutable()->get_ptr(),
+                              sub_serdes[0], _serde_options, valid));
+
+            simdjson::ondemand::value field_value = member_value.value();
+            RETURN_IF_ERROR(_simdjson_write_data_to_column(
+                    field_value, type_desc.children[1],
+                    
map_column_ptr->get_values_ptr()->assume_mutable()->get_ptr(),
+                    column_name + ".value", sub_serdes[1], valid));
+            field_count++;
+        }
+
+        auto& offsets = map_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + field_count);
+
+    } else if (type_desc.type == TYPE_ARRAY) {
+        if (value.type() != simdjson::ondemand::json_type::array) [[unlikely]] 
{
+            return Status::DataQualityError("Json value isn't array, but the 
column `{}` is array.",
+                                            column_name);
+        }
+
+        simdjson::ondemand::array array_value = value.get_array();
+
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto array_column_ptr = assert_cast<ColumnArray*>(data_column_ptr);
+
+        int field_count = 0;
+        for (simdjson::ondemand::value sub_value : array_value) {
+            RETURN_IF_ERROR(_simdjson_write_data_to_column(
+                    sub_value, type_desc.children[0], 
array_column_ptr->get_data().get_ptr(),
+                    column_name + ".element", sub_serdes[0], valid));
+            field_count++;
+        }
+        auto& offsets = array_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + field_count);
+
+    } else {
+        return Status::InternalError("Not support load to complex column.");
     }
     *valid = true;
     return Status::OK();
@@ -1750,13 +2023,14 @@ Status 
NewJsonReader::_simdjson_write_columns_by_jsonpath(
             has_valid_value = true;
         } else if (i >= _parsed_jsonpaths.size() || st.is<NOT_FOUND>()) {
             // not match in jsondata, filling with default value
-            RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, 
valid));
+            RETURN_IF_ERROR(_fill_missing_column(slot_desc, _serdes[i], 
column_ptr, valid));
             if (!(*valid)) {
                 return Status::OK();
             }
         } else {
-            RETURN_IF_ERROR(
-                    _simdjson_write_data_to_column(json_value, slot_desc, 
column_ptr, valid));
+            RETURN_IF_ERROR(_simdjson_write_data_to_column(json_value, 
slot_desc->type(),
+                                                           column_ptr, 
slot_desc->col_name(),
+                                                           _serdes[i], valid));
             if (!(*valid)) {
                 return Status::OK();
             }
@@ -1814,25 +2088,30 @@ Status NewJsonReader::_get_column_default_value(
     return Status::OK();
 }
 
-Status NewJsonReader::_fill_missing_column(SlotDescriptor* slot_desc, IColumn* 
column_ptr,
-                                           bool* valid) {
-    if (slot_desc->is_nullable()) {
-        auto* nullable_column = reinterpret_cast<ColumnNullable*>(column_ptr);
-        column_ptr = &nullable_column->get_nested_column();
-        auto col_value = _col_default_value_map.find(slot_desc->col_name());
-        if (col_value == _col_default_value_map.end()) {
+Status NewJsonReader::_fill_missing_column(SlotDescriptor* slot_desc, 
DataTypeSerDeSPtr serde,
+                                           IColumn* column_ptr, bool* valid) {
+    auto col_value = _col_default_value_map.find(slot_desc->col_name());
+    if (col_value == _col_default_value_map.end()) {
+        if (slot_desc->is_nullable()) {
+            auto* nullable_column = static_cast<ColumnNullable*>(column_ptr);
             nullable_column->insert_default();
         } else {
-            const std::string& v_str = col_value->second;
-            nullable_column->get_null_map_data().push_back(0);
-            assert_cast<ColumnString*>(column_ptr)->insert_data(v_str.c_str(), 
v_str.size());
+            if (_is_load) {
+                RETURN_IF_ERROR(_append_error_msg(
+                        nullptr, "The column `{}` is not nullable, but it's 
not found in jsondata.",
+                        slot_desc->col_name(), valid));
+            } else {
+                return Status::DataQualityError(
+                        "The column `{}` is not nullable, but it's not found 
in jsondata.",
+                        slot_desc->col_name());
+            }
         }
     } else {
-        RETURN_IF_ERROR(_append_error_msg(
-                nullptr, "The column `{}` is not nullable, but it's not found 
in jsondata.",
-                slot_desc->col_name(), valid));
+        const std::string& v_str = col_value->second;
+        Slice column_default_value {v_str};
+        RETURN_IF_ERROR(serde->deserialize_one_cell_from_json(*column_ptr, 
column_default_value,
+                                                              _serde_options));
     }
-
     *valid = true;
     return Status::OK();
 }
diff --git a/be/src/vec/exec/format/json/new_json_reader.h 
b/be/src/vec/exec/format/json/new_json_reader.h
index 985bf999676..482a1ced747 100644
--- a/be/src/vec/exec/format/json/new_json_reader.h
+++ b/be/src/vec/exec/format/json/new_json_reader.h
@@ -87,7 +87,8 @@ public:
     ~NewJsonReader() override = default;
 
     Status init_reader(const std::unordered_map<std::string, 
vectorized::VExprContextSPtr>&
-                               col_default_value_ctx);
+                               col_default_value_ctx,
+                       bool is_load);
     Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
     Status get_columns(std::unordered_map<std::string, TypeDescriptor>* 
name_to_type,
                        std::unordered_set<std::string>* missing_cols) override;
@@ -128,7 +129,8 @@ private:
                              const std::vector<SlotDescriptor*>& slot_descs, 
bool* valid);
 
     Status _write_data_to_column(rapidjson::Value::ConstValueIterator value,
-                                 SlotDescriptor* slot_desc, 
vectorized::IColumn* column_ptr,
+                                 const TypeDescriptor& type_desc, 
vectorized::IColumn* column_ptr,
+                                 const std::string& column_name, 
DataTypeSerDeSPtr serde,
                                  bool* valid);
 
     Status _write_columns_by_jsonpath(rapidjson::Value& objectValue,
@@ -177,8 +179,10 @@ private:
                                       const std::vector<SlotDescriptor*>& 
slot_descs, bool* valid);
 
     Status _simdjson_write_data_to_column(simdjson::ondemand::value& value,
-                                          SlotDescriptor* slot_desc,
-                                          vectorized::IColumn* column_ptr, 
bool* valid);
+                                          const TypeDescriptor& type_desc,
+                                          vectorized::IColumn* column_ptr,
+                                          const std::string& column_name, 
DataTypeSerDeSPtr serde,
+                                          bool* valid);
 
     Status _simdjson_write_columns_by_jsonpath(simdjson::ondemand::object* 
value,
                                                const 
std::vector<SlotDescriptor*>& slot_descs,
@@ -196,8 +200,8 @@ private:
             const std::unordered_map<std::string, 
vectorized::VExprContextSPtr>&
                     col_default_value_ctx);
 
-    Status _fill_missing_column(SlotDescriptor* slot_desc, 
vectorized::IColumn* column_ptr,
-                                bool* valid);
+    Status _fill_missing_column(SlotDescriptor* slot_desc, DataTypeSerDeSPtr 
serde,
+                                vectorized::IColumn* column_ptr, bool* valid);
 
     // fe will add skip_bitmap_col to _file_slot_descs iff the target olap 
table has skip_bitmap_col
     // and the current load is a flexible partial update
@@ -292,6 +296,22 @@ private:
     std::unordered_map<std::string, std::string> _col_default_value_map;
 
     int32_t skip_bitmap_col_idx {-1};
+
+    bool _is_load = true;
+    //Used to indicate whether it is a stream load. When loading, only data 
will be inserted into columnString.
+    //If an illegal value is encountered during the load process, 
`_append_error_msg` should be called
+    //instead of directly returning `Status::DataQualityError`
+
+    bool _is_hive_table = false;
+    // In hive : create table xxx ROW FORMAT SERDE 
'org.apache.hive.hcatalog.data.JsonSerDe';
+    // Hive will not allow you to create columns with the same name but 
different case, including field names inside
+    // structs, and will automatically convert uppercase names in create sql 
to lowercase.However, when Hive loads data
+    // to table, the column names in the data may be uppercase,and there may 
be multiple columns with
+    // the same name but different capitalization.We refer to the behavior of 
hive, convert all column names
+    // in the data to lowercase,and use the last one as the insertion value
+
+    DataTypeSerDeSPtrs _serdes;
+    vectorized::DataTypeSerDe::FormatOptions _serde_options;
 };
 
 } // namespace vectorized
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 982ac6072dc..3053adebbb5 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -965,8 +965,8 @@ Status VFileScanner::_get_next_reader() {
             _cur_reader =
                     NewJsonReader::create_unique(_state, _profile, &_counter, 
*_params, range,
                                                  _file_slot_descs, 
&_scanner_eof, _io_ctx.get());
-            init_status =
-                    
((NewJsonReader*)(_cur_reader.get()))->init_reader(_col_default_value_ctx);
+            init_status = ((NewJsonReader*)(_cur_reader.get()))
+                                  ->init_reader(_col_default_value_ctx, 
_is_load);
             break;
         }
         case TFileFormatType::FORMAT_AVRO: {
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run69.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run69.hql
new file mode 100644
index 00000000000..adf0f7d56b2
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run69.hql
@@ -0,0 +1,35 @@
+use `default`;
+
+
+CREATE TABLE json_nested_complex_table (
+    user_ID STRING,
+    user_PROFILE STRUCT<
+        name: STRING,
+        AGE: INT,
+        preferences: MAP<
+            STRING,
+            STRUCT<
+                preference_ID: INT,
+                preference_VALUES: ARRAY<STRING>
+            >
+        >
+    >,
+    activity_LOG ARRAY<
+        STRUCT<
+            activity_DATE: STRING,
+            activities: MAP<
+                STRING,
+                STRUCT<
+                    `DETAILS`: STRING,
+                    metrics: MAP<STRING, float>
+                >
+            >
+        >
+    >
+) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
+
+LOCATION
+  '/user/doris/preinstalled_data/json/json_nested_complex_table';
+
+
+msck repair table json_nested_complex_table;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run70.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run70.hql
new file mode 100644
index 00000000000..73df8cba557
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run70.hql
@@ -0,0 +1,73 @@
+use `default`;
+
+
+CREATE TABLE json_all_complex_types (
+  `id` int,
+  `boolean_col` boolean, 
+  `tinyint_col` tinyint, 
+  `smallint_col` smallint, 
+  `int_col` int, 
+  `bigint_col` bigint, 
+  `float_col` float, 
+  `double_col` double, 
+  `decimal_col1` decimal(9,0), 
+  `decimal_col2` decimal(8,4), 
+  `decimal_col3` decimal(18,6), 
+  `decimal_col4` decimal(38,12), 
+  `string_col` string, 
+  `binary_col` binary, 
+  `date_col` date, 
+  `timestamp_col1` timestamp, 
+  `timestamp_col2` timestamp, 
+  `timestamp_col3` timestamp, 
+  `char_col1` char(50), 
+  `char_col2` char(100), 
+  `char_col3` char(255), 
+  `varchar_col1` varchar(50), 
+  `varchar_col2` varchar(100), 
+  `varchar_col3` varchar(255), 
+  `t_map_string` map<string,string>, 
+  `t_map_varchar` map<varchar(65535),varchar(65535)>, 
+  `t_map_char` map<char(10),char(10)>, 
+  `t_map_int` map<int,int>, 
+  `t_map_bigint` map<bigint,bigint>, 
+  `t_map_float` map<float,float>, 
+  `t_map_double` map<double,double>, 
+  `t_map_boolean` map<boolean,boolean>, 
+  `t_map_decimal_precision_2` map<decimal(2,1),decimal(2,1)>, 
+  `t_map_decimal_precision_4` map<decimal(4,2),decimal(4,2)>, 
+  `t_map_decimal_precision_8` map<decimal(8,4),decimal(8,4)>, 
+  `t_map_decimal_precision_17` map<decimal(17,8),decimal(17,8)>, 
+  `t_map_decimal_precision_18` map<decimal(18,8),decimal(18,8)>, 
+  `t_map_decimal_precision_38` map<decimal(38,16),decimal(38,16)>, 
+  `t_array_string` array<string>, 
+  `t_array_int` array<int>, 
+  `t_array_bigint` array<bigint>, 
+  `t_array_float` array<float>, 
+  `t_array_double` array<double>, 
+  `t_array_boolean` array<boolean>, 
+  `t_array_varchar` array<varchar(65535)>, 
+  `t_array_char` array<char(10)>, 
+  `t_array_decimal_precision_2` array<decimal(2,1)>, 
+  `t_array_decimal_precision_4` array<decimal(4,2)>, 
+  `t_array_decimal_precision_8` array<decimal(8,4)>, 
+  `t_array_decimal_precision_17` array<decimal(17,8)>, 
+  `t_array_decimal_precision_18` array<decimal(18,8)>, 
+  `t_array_decimal_precision_38` array<decimal(38,16)>, 
+  `t_struct_bigint` struct<s_bigint:bigint>, 
+  `t_complex` map<string,array<struct<s_int:int>>>, 
+  `t_struct_nested` struct<struct_field:array<string>>, 
+  `t_struct_null` struct<struct_field_null:string,struct_field_null2:string>, 
+  `t_struct_non_nulls_after_nulls` 
struct<struct_non_nulls_after_nulls1:int,struct_non_nulls_after_nulls2:string>, 
+  `t_nested_struct_non_nulls_after_nulls` 
struct<struct_field1:int,struct_field2:string,strict_field3:struct<nested_struct_field1:int,nested_struct_field2:string>>,
 
+  `t_map_null_value` map<string,string>, 
+  `t_array_string_starting_with_nulls` array<string>, 
+  `t_array_string_with_nulls_in_between` array<string>, 
+  `t_array_string_ending_with_nulls` array<string>, 
+  `t_array_string_all_nulls` array<string>
+    ) PARTITIONED BY (`dt` string)
+ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' 
+LOCATION
+  '/user/doris/preinstalled_data/json/json_all_complex_types';
+
+msck repair table json_all_complex_types;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run71.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run71.hql
new file mode 100644
index 00000000000..ec99e72d2f5
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run71.hql
@@ -0,0 +1,13 @@
+use `default`;
+
+
+CREATE TABLE json_load_data_table (
+    `id` int,
+    `col1` int,
+    `col2` struct< col2a:int, col2b:string>,
+    `col3` map<int,string>
+) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
+LOCATION
+  '/user/doris/preinstalled_data/json/json_load_data_table';
+
+msck repair table json_load_data_table;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt1/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt1/000000_0
new file mode 100644
index 00000000000..5fe37cbc6f0
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt1/000000_0
@@ -0,0 +1,3 @@
+{"id":1,"boolean_col":true,"tinyint_col":127,"smallint_col":32767,"int_col":2147483647,"bigint_col":9223372036854775807,"float_col":123.45,"double_col":123456.789,"decimal_col1":123456789,"decimal_col2":1234.5678,"decimal_col3":123456.789012,"decimal_col4":123456789.012345678901,"string_col":"string_value","binary_col":"binary_value","date_col":"2024-03-20","timestamp_col1":"2024-03-20
 12:00:00","timestamp_col2":"2024-03-20 
12:00:00.123456789","timestamp_col3":"2024-03-20 12:00:00.123456 [...]
+{"id":2,"boolean_col":false,"tinyint_col":58,"smallint_col":12345,"int_col":2147483000,"bigint_col":null,"float_col":789.56,"double_col":654321.123,"decimal_col1":987654321,"decimal_col2":5678.1234,"decimal_col3":987654.321098,"decimal_col4":987654321.098765432109,"string_col":"changed_string","binary_col":"new_binary_value","date_col":"2025-05-25","timestamp_col1":"2025-05-25
 15:30:00","timestamp_col2":"2025-05-25 
15:30:00.654321987","timestamp_col3":"2025-05-25 15:30:00.654321987","cha [...]
+{"id":3,"boolean_col":false,"tinyint_col":-128,"smallint_col":-32768,"int_col":-2147483648,"bigint_col":-9223372036854775808,"float_col":-3.4028235E38,"double_col":-1.7976931348623157E308,"decimal_col1":-999999999,"decimal_col2":-9999.9999,"decimal_col3":-999999999.999999,"decimal_col4":null,"string_col":"min_string_value","binary_col":"xxxx","date_col":"2001-01-01","timestamp_col1":"2001-01-01
 00:00:00","timestamp_col2":"2001-01-01 00:00:00","timestamp_col3":"2001-01-01 
00:00:00","char_ [...]
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt2/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt2/000000_0
new file mode 100644
index 00000000000..0a823bee693
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt2/000000_0
@@ -0,0 +1 @@
+{"id":4,"boolean_col":null,"tinyint_col":null,"smallint_col":null,"int_col":null,"bigint_col":null,"float_col":123.45,"double_col":null,"decimal_col1":null,"decimal_col2":null,"decimal_col3":null,"decimal_col4":null,"string_col":null,"binary_col":null,"date_col":null,"timestamp_col1":null,"timestamp_col2":null,"timestamp_col3":null,"char_col1":null,"char_col2":null,"char_col3":null,"varchar_col1":null,"varchar_col2":null,"varchar_col3":null,"t_map_string":null,"t_map_varchar":null,"t_map
 [...]
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt3/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt3/000000_0
new file mode 100644
index 00000000000..a5e46399fdd
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt3/000000_0
@@ -0,0 +1,2 @@
+{"id":5,"boolean_col":null,"tinyint_col":null,"smallint_col":null,"int_col":null,"bigint_col":null,"float_col":null,"double_col":null,"decimal_col1":null,"decimal_col2":null,"decimal_col3":null,"decimal_col4":null,"string_col":null,"binary_col":null,"date_col":null,"timestamp_col1":null,"timestamp_col2":null,"timestamp_col3":null,"char_col1":null,"char_col2":null,"char_col3":null,"varchar_col1":null,"varchar_col2":null,"varchar_col3":null,"t_map_string":null,"t_map_varchar":null,"t_map_c
 [...]
+{"id":6,"boolean_col":null,"tinyint_col":null,"smallint_col":null,"int_col":null,"bigint_col":null,"float_col":null,"double_col":null,"decimal_col1":null,"decimal_col2":null,"decimal_col3":null,"decimal_col4":null,"string_col":null,"binary_col":null,"date_col":null,"timestamp_col1":null,"timestamp_col2":null,"timestamp_col3":null,"char_col1":null,"char_col2":null,"char_col3":null,"varchar_col1":null,"varchar_col2":null,"varchar_col3":null,"t_map_string":null,"t_map_varchar":null,"t_map_c
 [...]
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_load_data_table/1
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_load_data_table/1
new file mode 100644
index 00000000000..70d1265f98d
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_load_data_table/1
@@ -0,0 +1,13 @@
+{"id":1,"col1":10,"col2":{"col2a":10,"col2b":"string1"},"col3":{"1":"string10"}}
+{"id":2,"col1":10,"col1":20,"col2":{"col2b":"string2","col2a":0,"Col2A":20},"col3":{"2":"string2"}}
+{"id":3,"col1":10,"col1":20,"COL1":30,"COL2":{"col2a":30,"col2b":"string3"}}
+{"id":4,"COL1":40,"col2":{"col2a":10,"col2b":"string4","new_col":"new_val","col2a":40},"col3":{"4":"string4"}}
+{"id":5}
+{"id":6,"col1":60,"col2":{"COL2a":60,"col2b":600},"col3":{"6":600}}
+{"id":7,"col1":70,"col3":{"7":"string7"},"col2":{"col2b":"string7","col2a":70}}
+
+
+
+
+{}
+{"a":5}
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/1
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/1
new file mode 100644
index 00000000000..11342c441bc
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/1
@@ -0,0 +1,2 @@
+{"user_id":"user1","user_profile":{"name":"Alice","age":28,"preferences":{"sports":{"preference_id":101,"preference_values":["soccer","tennis"]},"music":{"preference_id":102,"preference_values":["rock","classical"]}}},"activity_log":[{"activity_date":"2024-08-01","activities":{"workout":{"details":"Morning
 run","metrics":{"duration":30.5,"calories":200.0}},"reading":{"details":"Read 
book on 
Hive","metrics":{"pages":50.0,"time":2.0}}}},{"activity_date":"2024-08-02","activities":{"travel":
 [...]
+{"user_id":"user2","user_profile":{"name":"Bob","age":32,"preferences":{"books":{"preference_id":201,"preference_values":["fiction","non-fiction"]},"travel":{"preference_id":202,"preference_values":["beaches","mountains"]}}},"activity_log":[{"activity_date":"2024-08-01","activities":{"hiking":{"details":"Mountain
 
trail","metrics":{"distance":10.0,"elevation":500.0}},"photography":{"details":"Wildlife
 
photoshoot","metrics":{"photos_taken":100.0,"time":4.0}}}},{"activity_date":"2024-08-02"
 [...]
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/2
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/2
new file mode 100644
index 00000000000..e1b0befc7bc
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/2
@@ -0,0 +1 @@
+{"user_id":"user3","user_profile":{"name":"Carol","age":24,"preferences":{"food":{"preference_id":301,"preference_values":["vegan","desserts"]},"movies":{"preference_id":302,"preference_values":["action","comedy"]}}},"activity_log":[{"activity_date":"2024-08-01","activities":{"cooking":{"details":"Made
 vegan 
meal","metrics":{"time_spent":1.5,"calories":500.0}},"movie":{"details":"Watched
 action 
movie","metrics":{"duration":2.0,"rating":8.5}}}},{"activity_date":"2024-08-02","activities":{
 [...]
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/modify_2
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/modify_2
new file mode 100644
index 00000000000..08f1586f3aa
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/modify_2
@@ -0,0 +1,2 @@
+{"user_ID":"user4","user_PROFILE":{"name":"Carol","age":24,"preferences":{"food":{"preference_ID":301,"preference_VALUES":["vegan","desserts"]},"movies":{"preference_ID":302,"preference_VALUES":["action","comedy"]}}},"activity_LOG":[{"activity_DATE":"2024-08-01","activities":{"cooking":{"DETAILS":"Made
 vegan 
meal","metrics":{"time_spent":1.5,"calories":500.0}},"movie":{"DETAILS":"Watched
 action 
movie","metrics":{"duration":2.0,"rating":8.5}}}},{"activity_DATE":"2024-08-02","activities":{
 [...]
+
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
index 97032467cec..0f839d238b2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
@@ -94,6 +94,9 @@ public class HiveMetaStoreClientHelper {
 
     private static final Pattern digitPattern = Pattern.compile("(\\d+)");
 
+    public static final String HIVE_JSON_SERDE = 
"org.apache.hive.hcatalog.data.JsonSerDe";
+    public static final String LEGACY_HIVE_JSON_SERDE = 
"org.apache.hadoop.hive.serde2.JsonSerDe";
+
     public enum HiveFileFormat {
         TEXT_FILE(0, "text"),
         PARQUET(1, "parquet"),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index e710bdb935d..3a2a4d3eb5c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -364,14 +364,21 @@ public class HiveScanNode extends FileQueryScanNode {
     @Override
     public TFileFormatType getFileFormatType() throws UserException {
         TFileFormatType type = null;
-        String inputFormatName = 
hmsTable.getRemoteTable().getSd().getInputFormat();
+        Table table = hmsTable.getRemoteTable();
+        String inputFormatName = table.getSd().getInputFormat();
         String hiveFormat = 
HiveMetaStoreClientHelper.HiveFileFormat.getFormat(inputFormatName);
         if 
(hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.PARQUET.getDesc())) 
{
             type = TFileFormatType.FORMAT_PARQUET;
         } else if 
(hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.ORC.getDesc())) {
             type = TFileFormatType.FORMAT_ORC;
         } else if 
(hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.TEXT_FILE.getDesc()))
 {
-            type = TFileFormatType.FORMAT_CSV_PLAIN;
+            String serDeLib = 
table.getSd().getSerdeInfo().getSerializationLib();
+            if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_JSON_SERDE)
+                    || 
serDeLib.equals(HiveMetaStoreClientHelper.LEGACY_HIVE_JSON_SERDE)) {
+                type = TFileFormatType.FORMAT_JSON;
+            } else {
+                type = TFileFormatType.FORMAT_CSV_PLAIN;
+            }
         }
         return type;
     }
@@ -383,11 +390,12 @@ public class HiveScanNode extends FileQueryScanNode {
 
     @Override
     protected TFileAttributes getFileAttributes() throws UserException {
-        TFileTextScanRangeParams textParams = new TFileTextScanRangeParams();
+        TFileAttributes fileAttributes = new TFileAttributes();
         Table table = hmsTable.getRemoteTable();
         // TODO: separate hive text table and OpenCsv table
         String serDeLib = table.getSd().getSerdeInfo().getSerializationLib();
         if 
(serDeLib.equals("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) {
+            TFileTextScanRangeParams textParams = new 
TFileTextScanRangeParams();
             // set properties of LazySimpleSerDe
             // 1. set column separator
             
textParams.setColumnSeparator(HiveProperties.getFieldDelimiter(table));
@@ -401,7 +409,10 @@ public class HiveScanNode extends FileQueryScanNode {
             HiveProperties.getEscapeDelimiter(table).ifPresent(d -> 
textParams.setEscape(d.getBytes()[0]));
             // 6. set null format
             textParams.setNullFormat(HiveProperties.getNullFormat(table));
+            fileAttributes.setTextParams(textParams);
+            fileAttributes.setHeaderType("");
         } else if 
(serDeLib.equals("org.apache.hadoop.hive.serde2.OpenCSVSerde")) {
+            TFileTextScanRangeParams textParams = new 
TFileTextScanRangeParams();
             // set set properties of OpenCSVSerde
             // 1. set column separator
             
textParams.setColumnSeparator(HiveProperties.getSeparatorChar(table));
@@ -411,17 +422,29 @@ public class HiveScanNode extends FileQueryScanNode {
             
textParams.setEnclose(HiveProperties.getQuoteChar(table).getBytes()[0]);
             // 4. set escape char
             
textParams.setEscape(HiveProperties.getEscapeChar(table).getBytes()[0]);
+            fileAttributes.setTextParams(textParams);
+            fileAttributes.setHeaderType("");
+            if (textParams.isSetEnclose()) {
+                fileAttributes.setTrimDoubleQuotes(true);
+            }
+        } else if (serDeLib.equals("org.apache.hive.hcatalog.data.JsonSerDe")) 
{
+            TFileTextScanRangeParams textParams = new 
TFileTextScanRangeParams();
+            textParams.setColumnSeparator("\t");
+            textParams.setLineDelimiter("\n");
+            fileAttributes.setTextParams(textParams);
+
+            fileAttributes.setJsonpaths("");
+            fileAttributes.setJsonRoot("");
+            fileAttributes.setNumAsString(true);
+            fileAttributes.setFuzzyParse(false);
+            fileAttributes.setReadJsonByLine(true);
+            fileAttributes.setStripOuterArray(false);
+            fileAttributes.setHeaderType("");
         } else {
             throw new UserException(
                     "unsupported hive table serde: " + serDeLib);
         }
 
-        TFileAttributes fileAttributes = new TFileAttributes();
-        fileAttributes.setTextParams(textParams);
-        fileAttributes.setHeaderType("");
-        if (textParams.isSet(TFileTextScanRangeParams._Fields.ENCLOSE)) {
-            fileAttributes.setTrimDoubleQuotes(true);
-        }
         return fileAttributes;
     }
 
diff --git 
a/regression-test/data/external_table_p0/hive/hive_json_basic_test.out 
b/regression-test/data/external_table_p0/hive/hive_json_basic_test.out
new file mode 100644
index 00000000000..9023f5d72b1
--- /dev/null
+++ b/regression-test/data/external_table_p0/hive/hive_json_basic_test.out
@@ -0,0 +1,115 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !q1 --
+1      true    127     32767   2147483647      9223372036854775807     123.45  
123456.789      123456789       1234.5678       123456.789012   
123456789.012345678901  string_value    binary_value    2024-03-20      
2024-03-20T12:00        2024-03-20T12:00:00.123457      
2024-03-20T12:00:00.123457      char_value1                                     
        char_value2                                                             
                                char_value3                                     
                                                              [...]
+2      false   58      12345   2147483000      \N      789.56  654321.123      
987654321       5678.1234       987654.321098   987654321.098765432109  
changed_string  new_binary_value        2025-05-25      2025-05-25T15:30        
2025-05-25T15:30:00.654322      2025-05-25T15:30:00.654322      char_new_value1 
                                        char_new_value2                         
                                                                char_new_value3 
                                                                                
                         [...]
+3      false   -128    -32768  -2147483648     -9223372036854775808    
-3.4028235e+38  -1.7976931348623157E308 -999999999      -9999.9999      
-999999999.999999       \N      min_string_value        xxxx    2001-01-01      
2001-01-01T00:00        2001-01-01T00:00        2001-01-01T00:00        
char_min_value1                                         char_min_value2         
                                                                                
char_min_value3                                                                 
                                          [...]
+4      \N      \N      \N      \N      \N      123.45  \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      {1:10}  \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      [1.2345, 
2.3456]        \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      [null, "value1", "value2"]      \N      \N      \N      dt2
+5      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      dt3
+6      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      dt3
+
+-- !q2 --
+3      false   -128    -32768  -2147483648     -9223372036854775808    
-3.4028235e+38  -1.7976931348623157E308 -999999999      -9999.9999      
-999999999.999999       \N      min_string_value        xxxx    2001-01-01      
2001-01-01T00:00        2001-01-01T00:00        2001-01-01T00:00        
char_min_value1                                         char_min_value2         
                                                                                
char_min_value3                                                                 
                                          [...]
+
+-- !q3 --
+1      true    127     32767   2147483647      9223372036854775807     123.45  
123456.789      123456789       1234.5678       123456.789012   
123456789.012345678901  string_value    binary_value    2024-03-20      
2024-03-20T12:00        2024-03-20T12:00:00.123457      
2024-03-20T12:00:00.123457      char_value1                                     
        char_value2                                                             
                                char_value3                                     
                                                              [...]
+
+-- !q4 --
+123.45
+789.56
+-3.4028235e+38
+123.45
+
+-- !q5 --
+2      false   58      12345   2147483000      \N      789.56  654321.123      
987654321       5678.1234       987654.321098   987654321.098765432109  
changed_string  new_binary_value        2025-05-25      2025-05-25T15:30        
2025-05-25T15:30:00.654322      2025-05-25T15:30:00.654322      char_new_value1 
                                        char_new_value2                         
                                                                char_new_value3 
                                                                                
                         [...]
+
+-- !q6 --
+user1  {"name":"Alice", "age":28, 
"preferences":{"sports":{"preference_id":101, "preference_values":["soccer", 
"tennis"]}, "music":{"preference_id":102, "preference_values":["rock", 
"classical"]}}}  [{"activity_date":"2024-08-01", 
"activities":{"workout":{"details":"Morning run", "metrics":{"duration":30.5, 
"calories":200}}, "reading":{"details":"Read book on Hive", 
"metrics":{"pages":50, "time":2}}}}, {"activity_date":"2024-08-02", 
"activities":{"travel":{"details":"Flight to NY", "metric [...]
+user2  {"name":"Bob", "age":32, "preferences":{"books":{"preference_id":201, 
"preference_values":["fiction", "non-fiction"]}, "travel":{"preference_id":202, 
"preference_values":["beaches", "mountains"]}}}   
[{"activity_date":"2024-08-01", "activities":{"hiking":{"details":"Mountain 
trail", "metrics":{"distance":10, "elevation":500}}, 
"photography":{"details":"Wildlife photoshoot", "metrics":{"photos_taken":100, 
"time":4}}}}, {"activity_date":"2024-08-02", 
"activities":{"workshop":{"details" [...]
+user3  {"name":"Carol", "age":24, "preferences":{"food":{"preference_id":301, 
"preference_values":["vegan", "desserts"]}, "movies":{"preference_id":302, 
"preference_values":["action", "comedy"]}}}   [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength train 
[...]
+user4  {"name":"Carol", "age":24, "preferences":{"food":{"preference_id":301, 
"preference_values":["vegan", "desserts"]}, "movies":{"preference_id":302, 
"preference_values":["action", "comedy"]}}}   [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength train 
[...]
+
+-- !q7 --
+user1  [{"activity_date":"2024-08-01", 
"activities":{"workout":{"details":"Morning run", "metrics":{"duration":30.5, 
"calories":200}}, "reading":{"details":"Read book on Hive", 
"metrics":{"pages":50, "time":2}}}}, {"activity_date":"2024-08-02", 
"activities":{"travel":{"details":"Flight to NY", "metrics":{"distance":500, 
"time":3}}, "meeting":{"details":"Project meeting", "metrics":{"duration":1.5, 
"participants":5}}}}]
+user2  [{"activity_date":"2024-08-01", 
"activities":{"hiking":{"details":"Mountain trail", "metrics":{"distance":10, 
"elevation":500}}, "photography":{"details":"Wildlife photoshoot", 
"metrics":{"photos_taken":100, "time":4}}}}, {"activity_date":"2024-08-02", 
"activities":{"workshop":{"details":"Photography workshop", 
"metrics":{"duration":3, "participants":15}}, "shopping":{"details":"Bought 
camera gear", "metrics":{"items":5, "cost":1500}}}}]
+user3  [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength 
training", "metrics":{"duration":1, "calories":300}}, 
"shopping":{"details":"Bought groceries", "metrics":{"items":10, "cost":100}}}}]
+user4  [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength 
training", "metrics":{"duration":1, "calories":300}}, 
"shopping":{"details":"Bought groceries", "metrics":{"items":10, "cost":100}}}}]
+
+-- !q8 --
+\N     \N      \N      \N
+\N     \N      \N      \N
+1      10      {"col2a":10, "col2b":"string1"} {1:"string10"}
+2      20      {"col2a":20, "col2b":"string2"} {2:"string2"}
+3      30      {"col2a":30, "col2b":"string3"} \N
+4      40      {"col2a":40, "col2b":"string4"} {4:"string4"}
+5      \N      \N      \N
+6      60      {"col2a":60, "col2b":"600"}     {6:"600"}
+7      70      {"col2a":70, "col2b":"string7"} {7:"string7"}
+
+-- !q9 --
+\N     \N
+\N     \N
+\N     5
+10     1
+20     2
+30     3
+40     4
+60     6
+70     7
+
+-- !q1 --
+1      true    127     32767   2147483647      9223372036854775807     123.45  
123456.789      123456789       1234.5678       123456.789012   
123456789.012345678901  string_value    binary_value    2024-03-20      
2024-03-20T12:00        2024-03-20T12:00:00.123457      
2024-03-20T12:00:00.123457      char_value1                                     
        char_value2                                                             
                                char_value3                                     
                                                              [...]
+2      false   58      12345   2147483000      \N      789.56  654321.123      
987654321       5678.1234       987654.321098   987654321.098765432109  
changed_string  new_binary_value        2025-05-25      2025-05-25T15:30        
2025-05-25T15:30:00.654322      2025-05-25T15:30:00.654322      char_new_value1 
                                        char_new_value2                         
                                                                char_new_value3 
                                                                                
                         [...]
+3      false   -128    -32768  -2147483648     -9223372036854775808    
-3.4028235e+38  -1.7976931348623157E308 -999999999      -9999.9999      
-999999999.999999       \N      min_string_value        xxxx    2001-01-01      
2001-01-01T00:00        2001-01-01T00:00        2001-01-01T00:00        
char_min_value1                                         char_min_value2         
                                                                                
char_min_value3                                                                 
                                          [...]
+4      \N      \N      \N      \N      \N      123.45  \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      {1:10}  \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      [1.2345, 
2.3456]        \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      [null, "value1", "value2"]      \N      \N      \N      dt2
+5      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      dt3
+6      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      dt3
+
+-- !q2 --
+3      false   -128    -32768  -2147483648     -9223372036854775808    
-3.4028235e+38  -1.7976931348623157E308 -999999999      -9999.9999      
-999999999.999999       \N      min_string_value        xxxx    2001-01-01      
2001-01-01T00:00        2001-01-01T00:00        2001-01-01T00:00        
char_min_value1                                         char_min_value2         
                                                                                
char_min_value3                                                                 
                                          [...]
+
+-- !q3 --
+1      true    127     32767   2147483647      9223372036854775807     123.45  
123456.789      123456789       1234.5678       123456.789012   
123456789.012345678901  string_value    binary_value    2024-03-20      
2024-03-20T12:00        2024-03-20T12:00:00.123457      
2024-03-20T12:00:00.123457      char_value1                                     
        char_value2                                                             
                                char_value3                                     
                                                              [...]
+
+-- !q4 --
+123.45
+789.56
+-3.4028235e+38
+123.45
+
+-- !q5 --
+2      false   58      12345   2147483000      \N      789.56  654321.123      
987654321       5678.1234       987654.321098   987654321.098765432109  
changed_string  new_binary_value        2025-05-25      2025-05-25T15:30        
2025-05-25T15:30:00.654322      2025-05-25T15:30:00.654322      char_new_value1 
                                        char_new_value2                         
                                                                char_new_value3 
                                                                                
                         [...]
+
+-- !q6 --
+user1  {"name":"Alice", "age":28, 
"preferences":{"sports":{"preference_id":101, "preference_values":["soccer", 
"tennis"]}, "music":{"preference_id":102, "preference_values":["rock", 
"classical"]}}}  [{"activity_date":"2024-08-01", 
"activities":{"workout":{"details":"Morning run", "metrics":{"duration":30.5, 
"calories":200}}, "reading":{"details":"Read book on Hive", 
"metrics":{"pages":50, "time":2}}}}, {"activity_date":"2024-08-02", 
"activities":{"travel":{"details":"Flight to NY", "metric [...]
+user2  {"name":"Bob", "age":32, "preferences":{"books":{"preference_id":201, 
"preference_values":["fiction", "non-fiction"]}, "travel":{"preference_id":202, 
"preference_values":["beaches", "mountains"]}}}   
[{"activity_date":"2024-08-01", "activities":{"hiking":{"details":"Mountain 
trail", "metrics":{"distance":10, "elevation":500}}, 
"photography":{"details":"Wildlife photoshoot", "metrics":{"photos_taken":100, 
"time":4}}}}, {"activity_date":"2024-08-02", 
"activities":{"workshop":{"details" [...]
+user3  {"name":"Carol", "age":24, "preferences":{"food":{"preference_id":301, 
"preference_values":["vegan", "desserts"]}, "movies":{"preference_id":302, 
"preference_values":["action", "comedy"]}}}   [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength train 
[...]
+user4  {"name":"Carol", "age":24, "preferences":{"food":{"preference_id":301, 
"preference_values":["vegan", "desserts"]}, "movies":{"preference_id":302, 
"preference_values":["action", "comedy"]}}}   [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength train 
[...]
+
+-- !q7 --
+user1  [{"activity_date":"2024-08-01", 
"activities":{"workout":{"details":"Morning run", "metrics":{"duration":30.5, 
"calories":200}}, "reading":{"details":"Read book on Hive", 
"metrics":{"pages":50, "time":2}}}}, {"activity_date":"2024-08-02", 
"activities":{"travel":{"details":"Flight to NY", "metrics":{"distance":500, 
"time":3}}, "meeting":{"details":"Project meeting", "metrics":{"duration":1.5, 
"participants":5}}}}]
+user2  [{"activity_date":"2024-08-01", 
"activities":{"hiking":{"details":"Mountain trail", "metrics":{"distance":10, 
"elevation":500}}, "photography":{"details":"Wildlife photoshoot", 
"metrics":{"photos_taken":100, "time":4}}}}, {"activity_date":"2024-08-02", 
"activities":{"workshop":{"details":"Photography workshop", 
"metrics":{"duration":3, "participants":15}}, "shopping":{"details":"Bought 
camera gear", "metrics":{"items":5, "cost":1500}}}}]
+user3  [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength 
training", "metrics":{"duration":1, "calories":300}}, 
"shopping":{"details":"Bought groceries", "metrics":{"items":10, "cost":100}}}}]
+user4  [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength 
training", "metrics":{"duration":1, "calories":300}}, 
"shopping":{"details":"Bought groceries", "metrics":{"items":10, "cost":100}}}}]
+
+-- !q8 --
+\N     \N      \N      \N
+\N     \N      \N      \N
+1      10      {"col2a":10, "col2b":"string1"} {1:"string10"}
+2      20      {"col2a":20, "col2b":"string2"} {2:"string2"}
+3      30      {"col2a":30, "col2b":"string3"} \N
+4      40      {"col2a":40, "col2b":"string4"} {4:"string4"}
+5      \N      \N      \N
+6      60      {"col2a":60, "col2b":"600"}     {6:"600"}
+7      70      {"col2a":70, "col2b":"string7"} {7:"string7"}
+
+-- !q9 --
+\N     \N
+\N     \N
+\N     5
+10     1
+20     2
+30     3
+40     4
+60     6
+70     7
+
diff --git 
a/regression-test/suites/external_table_p0/hive/hive_json_basic_test.groovy 
b/regression-test/suites/external_table_p0/hive/hive_json_basic_test.groovy
new file mode 100644
index 00000000000..9d05e1a4c74
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/hive_json_basic_test.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("hive_json_basic_test",  
"p0,external,hive,external_docker,external_docker_hive") {
+
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    for (String hivePrefix : ["hive2", "hive3"]) {
+        try {
+            String externalEnvIp = 
context.config.otherConfigs.get("externalEnvIp")
+            String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+            String catalog_name = "${hivePrefix}_hive_json_basic_test"
+            String broker_name = "hdfs"
+
+            sql """drop catalog if exists ${catalog_name}"""
+            sql """create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hive.metastore.uris'='thrift://${externalEnvIp}:${hms_port}'
+            );"""
+            sql """use `${catalog_name}`.`default`"""
+
+            String tb1 = """json_all_complex_types"""
+            String tb2 = """json_nested_complex_table"""
+            String tb3 = """json_load_data_table"""
+
+            def tables = sql """ show tables """
+            logger.info("tables = ${tables}")
+
+            qt_q1 """ select * from ${tb1} order by id """
+            qt_q2 """ select * from ${tb1} where tinyint_col < 0  order by id 
"""
+            qt_q3 """ select * from ${tb1} where bigint_col > 0 order by id """
+            qt_q4 """ select float_col from ${tb1} where float_col is not null 
 order by id """
+            qt_q5 """ select * from ${tb1} where id = 2 order by id """
+
+
+
+            qt_q6 """ select * from  ${tb2} order by user_id"""
+            qt_q7 """ select user_id,activity_log from  ${tb2} order by 
user_id"""
+
+
+            order_qt_q8 """ select * from ${tb3} order by id """
+            
+            order_qt_q9 """ select col1,id from ${tb3} order by id """ 
+            
+
+
+
+            sql """drop catalog if exists ${catalog_name}"""
+        } finally {
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

(doris) branch master updated: [feature](hive)support hive catalog read json table. (#43469)

Reply via email to