hubgeter commented on code in PR #43469:
URL: https://github.com/apache/doris/pull/43469#discussion_r1836001942


##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -756,7 +770,23 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& 
objectValue, Block& bl
         _append_empty_skip_bitmap_value(block, cur_row_count);
     }
 
-    for (auto* slot_desc : slot_descs) {
+    if (_is_hive_table) {
+        //don't like _fuzzy_parse,each line read in must modify name_map once.
+        for (auto* v : slot_descs) {

Review Comment:
   Here we not only need to deal with the same column names, but also the order 
of the table column names and the json data. I think I can simplify this double 
for loop by using a layer of for loop plus map.



##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -840,79 +871,145 @@ Status 
NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl
 }
 
 Status 
NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator value,
-                                            SlotDescriptor* slot_desc, 
IColumn* column_ptr,
+                                            const TypeDescriptor& type_desc,
+                                            vectorized::IColumn* column_ptr,
+                                            const std::string& column_name, 
DataTypeSerDeSPtr serde,
                                             bool* valid) {
-    const char* str_value = nullptr;
-    char tmp_buf[128] = {0};
-    int32_t wbytes = 0;
-    std::string json_str;
-
     ColumnNullable* nullable_column = nullptr;
-    if (slot_desc->is_nullable()) {
+    vectorized::IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+    vectorized::DataTypeSerDe::FormatOptions _options;

Review Comment:
   yeah , i will fix it.



##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -840,79 +871,145 @@ Status 
NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl
 }
 
 Status 
NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator value,
-                                            SlotDescriptor* slot_desc, 
IColumn* column_ptr,
+                                            const TypeDescriptor& type_desc,
+                                            vectorized::IColumn* column_ptr,
+                                            const std::string& column_name, 
DataTypeSerDeSPtr serde,
                                             bool* valid) {
-    const char* str_value = nullptr;
-    char tmp_buf[128] = {0};
-    int32_t wbytes = 0;
-    std::string json_str;
-
     ColumnNullable* nullable_column = nullptr;
-    if (slot_desc->is_nullable()) {
+    vectorized::IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+    vectorized::DataTypeSerDe::FormatOptions _options;
+
+    bool value_is_null = (value == nullptr) || (value->GetType() == 
rapidjson::Type::kNullType);
+
+    if (column_ptr->is_nullable()) {
         nullable_column = reinterpret_cast<ColumnNullable*>(column_ptr);
-        // kNullType will put 1 into the Null map, so there is no need to push 
0 for kNullType.
-        if (value->GetType() != rapidjson::Type::kNullType) {
+        data_column_ptr = nullable_column->get_nested_column().get_ptr();
+        data_serde = serde->get_nested_serdes()[0];
+
+        if (value_is_null) {
+            nullable_column->insert_default();
+            return Status::OK();
+        } else {
             nullable_column->get_null_map_data().push_back(0);
+        }
+
+    } else if (value_is_null) [[unlikely]] {
+        if (_is_load) {
+            RETURN_IF_ERROR(_append_error_msg(
+                    *value, "Json value is null, but the column `{}` is not 
nullable.", column_name,
+                    valid));
+            return Status::OK();
+
         } else {
-            nullable_column->insert_default();
+            return Status::DataQualityError(
+                    "Json value is null, but the column `{}` is not 
nullable.", column_name);
         }
-        column_ptr = &nullable_column->get_nested_column();
     }
 
-    switch (value->GetType()) {
-    case rapidjson::Type::kStringType:
-        str_value = value->GetString();
-        wbytes = value->GetStringLength();
-        break;
-    case rapidjson::Type::kNumberType:
-        if (value->IsUint()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%u", 
value->GetUint());
-        } else if (value->IsInt()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", value->GetInt());
-        } else if (value->IsUint64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRIu64, 
value->GetUint64());
-        } else if (value->IsInt64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, 
value->GetInt64());
-        } else if (value->IsFloat() || value->IsDouble()) {
-            auto* end = fmt::format_to(tmp_buf, "{}", value->GetDouble());
-            wbytes = end - tmp_buf;
+    if (_is_load || !type_desc.is_complex_type()) {
+        if (value->IsString()) {
+            Slice slice {value->GetString(), value->GetStringLength()};
+            RETURN_IF_ERROR(
+                    
data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, _options));
+
         } else {
-            return Status::InternalError<false>("It should not here.");
+            // Maybe we can `switch (value->GetType()) case: kNumberType`.
+            // Note that `if (value->IsInt())`, but column is FloatColumn.
+            auto json_str = NewJsonReader::_print_json_value(*value);
+            Slice slice {json_str.c_str(), json_str.size()};
+            RETURN_IF_ERROR(
+                    
data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, _options));
         }
-        str_value = tmp_buf;
-        break;
-    case rapidjson::Type::kFalseType:
-        wbytes = 1;
-        str_value = (char*)"0";
-        break;
-    case rapidjson::Type::kTrueType:
-        wbytes = 1;
-        str_value = (char*)"1";
-        break;
-    case rapidjson::Type::kNullType:
-        if (!slot_desc->is_nullable()) {
-            RETURN_IF_ERROR(_append_error_msg(
-                    *value, "Json value is null, but the column `{}` is not 
nullable.",
-                    slot_desc->col_name(), valid));
-            return Status::OK();
+    } else if (type_desc.type == TYPE_STRUCT) {

Review Comment:
   i need check `_is_load` and  `type_desc` , It seems that there is no way to 
use `switch`.



##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -840,79 +871,145 @@ Status 
NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl
 }
 
 Status 
NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator value,
-                                            SlotDescriptor* slot_desc, 
IColumn* column_ptr,
+                                            const TypeDescriptor& type_desc,
+                                            vectorized::IColumn* column_ptr,
+                                            const std::string& column_name, 
DataTypeSerDeSPtr serde,
                                             bool* valid) {
-    const char* str_value = nullptr;
-    char tmp_buf[128] = {0};
-    int32_t wbytes = 0;
-    std::string json_str;
-
     ColumnNullable* nullable_column = nullptr;
-    if (slot_desc->is_nullable()) {
+    vectorized::IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+    vectorized::DataTypeSerDe::FormatOptions _options;
+
+    bool value_is_null = (value == nullptr) || (value->GetType() == 
rapidjson::Type::kNullType);
+
+    if (column_ptr->is_nullable()) {
         nullable_column = reinterpret_cast<ColumnNullable*>(column_ptr);
-        // kNullType will put 1 into the Null map, so there is no need to push 
0 for kNullType.
-        if (value->GetType() != rapidjson::Type::kNullType) {
+        data_column_ptr = nullable_column->get_nested_column().get_ptr();
+        data_serde = serde->get_nested_serdes()[0];
+
+        if (value_is_null) {
+            nullable_column->insert_default();
+            return Status::OK();
+        } else {
             nullable_column->get_null_map_data().push_back(0);
+        }
+
+    } else if (value_is_null) [[unlikely]] {
+        if (_is_load) {
+            RETURN_IF_ERROR(_append_error_msg(
+                    *value, "Json value is null, but the column `{}` is not 
nullable.", column_name,
+                    valid));
+            return Status::OK();
+
         } else {
-            nullable_column->insert_default();
+            return Status::DataQualityError(
+                    "Json value is null, but the column `{}` is not 
nullable.", column_name);
         }
-        column_ptr = &nullable_column->get_nested_column();
     }
 
-    switch (value->GetType()) {
-    case rapidjson::Type::kStringType:
-        str_value = value->GetString();
-        wbytes = value->GetStringLength();
-        break;
-    case rapidjson::Type::kNumberType:
-        if (value->IsUint()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%u", 
value->GetUint());
-        } else if (value->IsInt()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", value->GetInt());
-        } else if (value->IsUint64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRIu64, 
value->GetUint64());
-        } else if (value->IsInt64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, 
value->GetInt64());
-        } else if (value->IsFloat() || value->IsDouble()) {
-            auto* end = fmt::format_to(tmp_buf, "{}", value->GetDouble());
-            wbytes = end - tmp_buf;
+    if (_is_load || !type_desc.is_complex_type()) {
+        if (value->IsString()) {
+            Slice slice {value->GetString(), value->GetStringLength()};
+            RETURN_IF_ERROR(
+                    
data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, _options));
+
         } else {
-            return Status::InternalError<false>("It should not here.");
+            // Maybe we can `switch (value->GetType()) case: kNumberType`.

Review Comment:
   I'm not sure if using `switch (value->GetType())` to get the string behavior 
would improve performance, but I might give it a try.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to