morningman commented on code in PR #43469:
URL: https://github.com/apache/doris/pull/43469#discussion_r1835707840


##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -840,79 +871,145 @@ Status 
NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl
 }
 
 Status 
NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator value,
-                                            SlotDescriptor* slot_desc, 
IColumn* column_ptr,
+                                            const TypeDescriptor& type_desc,
+                                            vectorized::IColumn* column_ptr,
+                                            const std::string& column_name, 
DataTypeSerDeSPtr serde,
                                             bool* valid) {
-    const char* str_value = nullptr;
-    char tmp_buf[128] = {0};
-    int32_t wbytes = 0;
-    std::string json_str;
-
     ColumnNullable* nullable_column = nullptr;
-    if (slot_desc->is_nullable()) {
+    vectorized::IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+    vectorized::DataTypeSerDe::FormatOptions _options;

Review Comment:
   Looks like this `_options` can be created only once at reuse it in each call.



##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -840,79 +871,145 @@ Status 
NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl
 }
 
 Status 
NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator value,
-                                            SlotDescriptor* slot_desc, 
IColumn* column_ptr,
+                                            const TypeDescriptor& type_desc,
+                                            vectorized::IColumn* column_ptr,
+                                            const std::string& column_name, 
DataTypeSerDeSPtr serde,
                                             bool* valid) {
-    const char* str_value = nullptr;
-    char tmp_buf[128] = {0};
-    int32_t wbytes = 0;
-    std::string json_str;
-
     ColumnNullable* nullable_column = nullptr;
-    if (slot_desc->is_nullable()) {
+    vectorized::IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+    vectorized::DataTypeSerDe::FormatOptions _options;
+
+    bool value_is_null = (value == nullptr) || (value->GetType() == 
rapidjson::Type::kNullType);
+
+    if (column_ptr->is_nullable()) {
         nullable_column = reinterpret_cast<ColumnNullable*>(column_ptr);
-        // kNullType will put 1 into the Null map, so there is no need to push 
0 for kNullType.
-        if (value->GetType() != rapidjson::Type::kNullType) {
+        data_column_ptr = nullable_column->get_nested_column().get_ptr();
+        data_serde = serde->get_nested_serdes()[0];
+
+        if (value_is_null) {
+            nullable_column->insert_default();
+            return Status::OK();
+        } else {
             nullable_column->get_null_map_data().push_back(0);
+        }
+
+    } else if (value_is_null) [[unlikely]] {
+        if (_is_load) {
+            RETURN_IF_ERROR(_append_error_msg(
+                    *value, "Json value is null, but the column `{}` is not 
nullable.", column_name,
+                    valid));
+            return Status::OK();
+
         } else {
-            nullable_column->insert_default();
+            return Status::DataQualityError(
+                    "Json value is null, but the column `{}` is not 
nullable.", column_name);
         }
-        column_ptr = &nullable_column->get_nested_column();
     }
 
-    switch (value->GetType()) {
-    case rapidjson::Type::kStringType:
-        str_value = value->GetString();
-        wbytes = value->GetStringLength();
-        break;
-    case rapidjson::Type::kNumberType:
-        if (value->IsUint()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%u", 
value->GetUint());
-        } else if (value->IsInt()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", value->GetInt());
-        } else if (value->IsUint64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRIu64, 
value->GetUint64());
-        } else if (value->IsInt64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, 
value->GetInt64());
-        } else if (value->IsFloat() || value->IsDouble()) {
-            auto* end = fmt::format_to(tmp_buf, "{}", value->GetDouble());
-            wbytes = end - tmp_buf;
+    if (_is_load || !type_desc.is_complex_type()) {
+        if (value->IsString()) {
+            Slice slice {value->GetString(), value->GetStringLength()};
+            RETURN_IF_ERROR(
+                    
data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, _options));
+
         } else {
-            return Status::InternalError<false>("It should not here.");
+            // Maybe we can `switch (value->GetType()) case: kNumberType`.
+            // Note that `if (value->IsInt())`, but column is FloatColumn.
+            auto json_str = NewJsonReader::_print_json_value(*value);
+            Slice slice {json_str.c_str(), json_str.size()};
+            RETURN_IF_ERROR(
+                    
data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, _options));
         }
-        str_value = tmp_buf;
-        break;
-    case rapidjson::Type::kFalseType:
-        wbytes = 1;
-        str_value = (char*)"0";
-        break;
-    case rapidjson::Type::kTrueType:
-        wbytes = 1;
-        str_value = (char*)"1";
-        break;
-    case rapidjson::Type::kNullType:
-        if (!slot_desc->is_nullable()) {
-            RETURN_IF_ERROR(_append_error_msg(
-                    *value, "Json value is null, but the column `{}` is not 
nullable.",
-                    slot_desc->col_name(), valid));
-            return Status::OK();
+    } else if (type_desc.type == TYPE_STRUCT) {

Review Comment:
   Can we use `switch ... case` for following logic?



##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -756,7 +770,23 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& 
objectValue, Block& bl
         _append_empty_skip_bitmap_value(block, cur_row_count);
     }
 
-    for (auto* slot_desc : slot_descs) {
+    if (_is_hive_table) {
+        //don't like _fuzzy_parse,each line read in must modify name_map once.
+        for (auto* v : slot_descs) {

Review Comment:
   I think it will seriously impact the performance.
   What if we set a session variable to control it? And by default, in most 
cases, there is no duplicate column name in json value.



##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -840,79 +871,145 @@ Status 
NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl
 }
 
 Status 
NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator value,
-                                            SlotDescriptor* slot_desc, 
IColumn* column_ptr,
+                                            const TypeDescriptor& type_desc,
+                                            vectorized::IColumn* column_ptr,
+                                            const std::string& column_name, 
DataTypeSerDeSPtr serde,
                                             bool* valid) {
-    const char* str_value = nullptr;
-    char tmp_buf[128] = {0};
-    int32_t wbytes = 0;
-    std::string json_str;
-
     ColumnNullable* nullable_column = nullptr;
-    if (slot_desc->is_nullable()) {
+    vectorized::IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+    vectorized::DataTypeSerDe::FormatOptions _options;
+
+    bool value_is_null = (value == nullptr) || (value->GetType() == 
rapidjson::Type::kNullType);
+
+    if (column_ptr->is_nullable()) {
         nullable_column = reinterpret_cast<ColumnNullable*>(column_ptr);
-        // kNullType will put 1 into the Null map, so there is no need to push 
0 for kNullType.
-        if (value->GetType() != rapidjson::Type::kNullType) {
+        data_column_ptr = nullable_column->get_nested_column().get_ptr();
+        data_serde = serde->get_nested_serdes()[0];
+
+        if (value_is_null) {
+            nullable_column->insert_default();
+            return Status::OK();
+        } else {
             nullable_column->get_null_map_data().push_back(0);
+        }
+
+    } else if (value_is_null) [[unlikely]] {
+        if (_is_load) {
+            RETURN_IF_ERROR(_append_error_msg(
+                    *value, "Json value is null, but the column `{}` is not 
nullable.", column_name,
+                    valid));
+            return Status::OK();
+
         } else {
-            nullable_column->insert_default();
+            return Status::DataQualityError(
+                    "Json value is null, but the column `{}` is not 
nullable.", column_name);
         }
-        column_ptr = &nullable_column->get_nested_column();
     }
 
-    switch (value->GetType()) {
-    case rapidjson::Type::kStringType:
-        str_value = value->GetString();
-        wbytes = value->GetStringLength();
-        break;
-    case rapidjson::Type::kNumberType:
-        if (value->IsUint()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%u", 
value->GetUint());
-        } else if (value->IsInt()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", value->GetInt());
-        } else if (value->IsUint64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRIu64, 
value->GetUint64());
-        } else if (value->IsInt64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, 
value->GetInt64());
-        } else if (value->IsFloat() || value->IsDouble()) {
-            auto* end = fmt::format_to(tmp_buf, "{}", value->GetDouble());
-            wbytes = end - tmp_buf;
+    if (_is_load || !type_desc.is_complex_type()) {
+        if (value->IsString()) {
+            Slice slice {value->GetString(), value->GetStringLength()};
+            RETURN_IF_ERROR(
+                    
data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, _options));
+
         } else {
-            return Status::InternalError<false>("It should not here.");
+            // Maybe we can `switch (value->GetType()) case: kNumberType`.

Review Comment:
   Why not using `switch (value->GetType())`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to