This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new bf0250ac345 [fix](serde)fix the bug in 
DataTypeNullableSerDe.deserialize_column_from_fixed_json (#41217) (#41963)
bf0250ac345 is described below

commit bf0250ac345beac2364fa4d483ce421733778638
Author: daidai <2017501...@qq.com>
AuthorDate: Thu Oct 17 17:41:17 2024 +0800

    [fix](serde)fix the bug in 
DataTypeNullableSerDe.deserialize_column_from_fixed_json (#41217) (#41963)
    
    bp #41217
---
 be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp |  6 ++++++
 be/src/vec/data_types/serde/data_type_datev2_serde.cpp     |  6 ++++++
 be/src/vec/data_types/serde/data_type_decimal_serde.cpp    |  6 ++++++
 be/src/vec/data_types/serde/data_type_nullable_serde.cpp   |  5 ++++-
 be/src/vec/data_types/serde/data_type_number_serde.cpp     |  6 ++++++
 be/src/vec/data_types/serde/data_type_serde.h              | 11 ++++++++++-
 be/src/vec/data_types/serde/data_type_string_serde.h       |  6 ++++++
 be/src/vec/exec/format/parquet/vparquet_group_reader.cpp   |  1 +
 8 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp 
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
index e57af914d43..e8238af4eee 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
@@ -252,6 +252,9 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const 
std::string& timezone,
 Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json(
         IColumn& column, Slice& slice, int rows, int* num_deserialized,
         const FormatOptions& options) const {
+    if (rows < 1) [[unlikely]] {
+        return Status::OK();
+    }
     Status st = deserialize_one_cell_from_json(column, slice, options);
     if (!st.ok()) {
         return st;
@@ -264,6 +267,9 @@ Status 
DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json(
 
 void DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(IColumn& 
column,
                                                                       int 
times) const {
+    if (times < 1) [[unlikely]] {
+        return;
+    }
     auto& col = static_cast<ColumnVector<UInt64>&>(column);
     auto sz = col.size();
     UInt64 val = col.get_element(sz - 1);
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp 
b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
index f2d595b87c4..95109ee408c 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
@@ -178,6 +178,9 @@ Status DataTypeDateV2SerDe::write_column_to_orc(const 
std::string& timezone, con
 Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& 
column, Slice& slice,
                                                                int rows, int* 
num_deserialized,
                                                                const 
FormatOptions& options) const {
+    if (rows < 1) [[unlikely]] {
+        return Status::OK();
+    }
     Status st = deserialize_one_cell_from_json(column, slice, options);
     if (!st.ok()) {
         return st;
@@ -189,6 +192,9 @@ Status 
DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column,
 
 void DataTypeDateV2SerDe::insert_column_last_value_multiple_times(IColumn& 
column,
                                                                   int times) 
const {
+    if (times < 1) [[unlikely]] {
+        return;
+    }
     auto& col = static_cast<ColumnVector<UInt32>&>(column);
     auto sz = col.size();
     UInt32 val = col.get_element(sz - 1);
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp 
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index e979211d6d7..acb09ee773e 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -280,6 +280,9 @@ template <typename T>
 Status DataTypeDecimalSerDe<T>::deserialize_column_from_fixed_json(
         IColumn& column, Slice& slice, int rows, int* num_deserialized,
         const FormatOptions& options) const {
+    if (rows < 1) [[unlikely]] {
+        return Status::OK();
+    }
     Status st = deserialize_one_cell_from_json(column, slice, options);
     if (!st.ok()) {
         return st;
@@ -293,6 +296,9 @@ Status 
DataTypeDecimalSerDe<T>::deserialize_column_from_fixed_json(
 template <typename T>
 void DataTypeDecimalSerDe<T>::insert_column_last_value_multiple_times(IColumn& 
column,
                                                                       int 
times) const {
+    if (times < 1) [[unlikely]] {
+        return;
+    }
     auto& col = static_cast<ColumnDecimal<T>&>(column);
     auto sz = col.size();
 
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp 
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index 1af85bd040d..3b46e0e784f 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -131,12 +131,15 @@ Status 
DataTypeNullableSerDe::deserialize_column_from_hive_text_vector(
 Status DataTypeNullableSerDe::deserialize_column_from_fixed_json(
         IColumn& column, Slice& slice, int rows, int* num_deserialized,
         const FormatOptions& options) const {
+    if (rows < 1) [[unlikely]] {
+        return Status::OK();
+    }
     auto& col = static_cast<ColumnNullable&>(column);
     Status st = deserialize_one_cell_from_json(column, slice, options);
     if (!st.ok()) {
         return st;
     }
-    if (rows - 1 != 0) {
+    if (rows > 1) {
         auto& null_map = col.get_null_map_data();
         auto& nested_column = col.get_nested_column();
 
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index 299779ea267..efa41e346bf 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -228,6 +228,9 @@ template <typename T>
 Status DataTypeNumberSerDe<T>::deserialize_column_from_fixed_json(
         IColumn& column, Slice& slice, int rows, int* num_deserialized,
         const FormatOptions& options) const {
+    if (rows < 1) [[unlikely]] {
+        return Status::OK();
+    }
     Status st = deserialize_one_cell_from_json(column, slice, options);
     if (!st.ok()) {
         return st;
@@ -241,6 +244,9 @@ Status 
DataTypeNumberSerDe<T>::deserialize_column_from_fixed_json(
 template <typename T>
 void DataTypeNumberSerDe<T>::insert_column_last_value_multiple_times(IColumn& 
column,
                                                                      int 
times) const {
+    if (times < 1) [[unlikely]] {
+        return;
+    }
     auto& col = static_cast<ColumnVector<T>&>(column);
     auto sz = col.size();
     T val = col.get_element(sz - 1);
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index 262f9cae6a8..46236faa926 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -243,17 +243,26 @@ public:
     virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& 
slice, int rows,
                                                       int* num_deserialized,
                                                       const FormatOptions& 
options) const {
+        //In this function implementation, we need to consider the case where 
rows is 0, 1, and other larger integers.
+        if (rows < 1) [[unlikely]] {
+            return Status::OK();
+        }
         Status st = deserialize_one_cell_from_json(column, slice, options);
         if (!st.ok()) {
             *num_deserialized = 0;
             return st;
         }
-        insert_column_last_value_multiple_times(column, rows - 1);
+        if (rows > 1) [[likely]] {
+            insert_column_last_value_multiple_times(column, rows - 1);
+        }
         *num_deserialized = rows;
         return Status::OK();
     }
     // Insert the last value to the end of this column multiple times.
     virtual void insert_column_last_value_multiple_times(IColumn& column, int 
times) const {
+        if (times < 1) [[unlikely]] {
+            return;
+        }
         //If you try to simplify this operation by using 
`column.insert_many_from(column, column.size() - 1, rows - 1);`
         // you are likely to get incorrect data results.
         MutableColumnPtr dum_col = column.clone_empty();
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h 
b/be/src/vec/data_types/serde/data_type_string_serde.h
index fe09ff615f4..583772c5825 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -218,6 +218,9 @@ public:
     Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
                                               int* num_deserialized,
                                               const FormatOptions& options) 
const override {
+        if (rows < 1) [[unlikely]] {
+            return Status::OK();
+        }
         Status st = deserialize_one_cell_from_json(column, slice, options);
         if (!st.ok()) {
             return st;
@@ -229,6 +232,9 @@ public:
     }
 
     void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override {
+        if (times < 1) [[unlikely]] {
+            return;
+        }
         auto& col = static_cast<ColumnString&>(column);
         auto sz = col.size();
 
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 08ecb601f39..37e82774c39 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -654,6 +654,7 @@ Status RowGroupReader::_fill_partition_columns(
         auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
         Slice slice(value.data(), value.size());
         int num_deserialized = 0;
+        // Be careful when reading empty rows from parquet row groups.
         if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, 
rows,
                                                             &num_deserialized,
                                                             
_text_formatOptions) != Status::OK()) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to