This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 607c0b82a9c [opt](serde)Optimize the filling of fixed values ​​into 
block columns without repeated deserialization. (#37377) (#38245) (#38810)
607c0b82a9c is described below

commit 607c0b82a9c6b9f39418d921c986ed44186ff3a4
Author: daidai <2017501...@qq.com>
AuthorDate: Mon Aug 5 09:13:08 2024 +0800

    [opt](serde)Optimize the filling of fixed values ​​into block columns 
without repeated deserialization. (#37377) (#38245) (#38810)
    
    ## Proposed changes
    pick pr: #38575  and fix this pr bug :  #38245
---
 .../serde/data_type_datetimev2_serde.cpp           |  21 +++++
 .../data_types/serde/data_type_datetimev2_serde.h  |   5 +
 .../data_types/serde/data_type_datev2_serde.cpp    |  21 +++++
 .../vec/data_types/serde/data_type_datev2_serde.h  |   6 ++
 .../data_types/serde/data_type_decimal_serde.cpp   |  26 ++++++
 .../vec/data_types/serde/data_type_decimal_serde.h |   6 ++
 .../data_types/serde/data_type_nullable_serde.cpp  |  22 +++++
 .../data_types/serde/data_type_nullable_serde.h    |   3 +
 .../data_types/serde/data_type_number_serde.cpp    |  22 +++++
 .../vec/data_types/serde/data_type_number_serde.h  |   6 ++
 be/src/vec/data_types/serde/data_type_serde.h      |  21 +++++
 .../vec/data_types/serde/data_type_string_serde.h  |  25 +++++
 be/src/vec/exec/format/orc/vorc_reader.cpp         |   9 +-
 .../exec/format/parquet/vparquet_group_reader.cpp  |   9 +-
 be/src/vec/exec/scan/vfile_scanner.cpp             |   9 +-
 .../scripts/create_preinstalled_scripts/run65.hql  |  28 ++++++
 .../partition_col2=1/000000_0_copy_10              | Bin 0 -> 738 bytes
 .../partition_col2=1/000000_0_copy_11              | Bin 0 -> 806 bytes
 .../partition_col2=1/000000_0_copy_12              | Bin 0 -> 936 bytes
 .../partition_col2=1/000000_0_copy_13              | Bin 0 -> 1153 bytes
 .../partition_col2=1/000000_0_copy_10              | Bin 0 -> 3193 bytes
 .../partition_col2=1/000000_0_copy_11              | Bin 0 -> 5703 bytes
 .../partition_col2=1/000000_0_copy_9               | Bin 0 -> 1687 bytes
 .../hive/test_hive_opt_fill_partition.out          | 101 +++++++++++++++++++++
 .../hive/test_hive_opt_fill_partition.groovy       |  74 +++++++++++++++
 25 files changed, 396 insertions(+), 18 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp 
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
index 63a199199a0..850ac5766fc 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
@@ -247,4 +247,25 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const 
std::string& timezone,
     return Status::OK();
 }
 
+Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json(
+        IColumn& column, Slice& slice, int rows, int* num_deserialized,
+        const FormatOptions& options) const {
+    Status st = deserialize_one_cell_from_json(column, slice, options);
+    if (!st.ok()) {
+        return st;
+    }
+
+    DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(column, 
rows - 1);
+    *num_deserialized = rows;
+    return Status::OK();
+}
+
+void DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(IColumn& 
column,
+                                                                      int 
times) const {
+    auto& col = static_cast<ColumnVector<UInt64>&>(column);
+    auto sz = col.size();
+    UInt64 val = col.get_element(sz - 1);
+    col.insert_many_vals(val, times);
+}
+
 } // namespace doris::vectorized
diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h 
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
index 00b05f5fcd6..ef4aa6843a0 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
@@ -77,6 +77,11 @@ public:
                                int start, int end,
                                std::vector<StringRef>& buffer_list) const 
override;
 
+    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
+                                              int* num_deserialized,
+                                              const FormatOptions& options) 
const override;
+    void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override;
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp 
b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
index eb9122dd240..f2d595b87c4 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
@@ -175,5 +175,26 @@ Status DataTypeDateV2SerDe::write_column_to_orc(const 
std::string& timezone, con
     return Status::OK();
 }
 
+Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& 
column, Slice& slice,
+                                                               int rows, int* 
num_deserialized,
+                                                               const 
FormatOptions& options) const {
+    Status st = deserialize_one_cell_from_json(column, slice, options);
+    if (!st.ok()) {
+        return st;
+    }
+    DataTypeDateV2SerDe::insert_column_last_value_multiple_times(column, rows 
- 1);
+    *num_deserialized = rows;
+    return Status::OK();
+}
+
+void DataTypeDateV2SerDe::insert_column_last_value_multiple_times(IColumn& 
column,
+                                                                  int times) 
const {
+    auto& col = static_cast<ColumnVector<UInt32>&>(column);
+    auto sz = col.size();
+    UInt32 val = col.get_element(sz - 1);
+
+    col.insert_many_vals(val, times);
+}
+
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h 
b/be/src/vec/data_types/serde/data_type_datev2_serde.h
index 9a8b050eeba..52e4cec364e 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.h
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h
@@ -74,6 +74,12 @@ public:
                                int start, int end,
                                std::vector<StringRef>& buffer_list) const 
override;
 
+    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
+                                              int* num_deserialized,
+                                              const FormatOptions& options) 
const override;
+
+    void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override;
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp 
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index a59fdedbfe6..e979211d6d7 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -275,6 +275,32 @@ Status DataTypeDecimalSerDe<T>::write_column_to_orc(const 
std::string& timezone,
     }
     return Status::OK();
 }
+template <typename T>
+
+Status DataTypeDecimalSerDe<T>::deserialize_column_from_fixed_json(
+        IColumn& column, Slice& slice, int rows, int* num_deserialized,
+        const FormatOptions& options) const {
+    Status st = deserialize_one_cell_from_json(column, slice, options);
+    if (!st.ok()) {
+        return st;
+    }
+
+    DataTypeDecimalSerDe::insert_column_last_value_multiple_times(column, rows 
- 1);
+    *num_deserialized = rows;
+    return Status::OK();
+}
+
+template <typename T>
+void DataTypeDecimalSerDe<T>::insert_column_last_value_multiple_times(IColumn& 
column,
+                                                                      int 
times) const {
+    auto& col = static_cast<ColumnDecimal<T>&>(column);
+    auto sz = col.size();
+
+    T val = col.get_element(sz - 1);
+    for (int i = 0; i < times; i++) {
+        col.insert_value(val);
+    }
+}
 
 template class DataTypeDecimalSerDe<Decimal32>;
 template class DataTypeDecimalSerDe<Decimal64>;
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h 
b/be/src/vec/data_types/serde/data_type_decimal_serde.h
index 55e68699f01..484c6686bc5 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.h
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h
@@ -114,6 +114,12 @@ public:
                                int start, int end,
                                std::vector<StringRef>& buffer_list) const 
override;
 
+    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
+                                              int* num_deserialized,
+                                              const FormatOptions& options) 
const override;
+
+    void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override;
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp 
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index faa3c8eb1f4..014e8b0d5eb 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -127,6 +127,28 @@ Status 
DataTypeNullableSerDe::deserialize_column_from_hive_text_vector(
     return Status::OK();
 }
 
+Status DataTypeNullableSerDe::deserialize_column_from_fixed_json(
+        IColumn& column, Slice& slice, int rows, int* num_deserialized,
+        const FormatOptions& options) const {
+    auto& col = static_cast<ColumnNullable&>(column);
+    Status st = deserialize_one_cell_from_json(column, slice, options);
+    if (!st.ok()) {
+        return st;
+    }
+    if (rows - 1 != 0) {
+        auto& null_map = col.get_null_map_data();
+        auto& nested_column = col.get_nested_column();
+
+        uint8_t val = null_map.back();
+        size_t new_sz = null_map.size() + rows - 1;
+        null_map.resize_fill(new_sz,
+                             val); // 
data_type_nullable::insert_column_last_value_multiple_times()
+        nested_serde->insert_column_last_value_multiple_times(nested_column, 
rows - 1);
+    }
+    *num_deserialized = rows;
+    return Status::OK();
+}
+
 Status DataTypeNullableSerDe::deserialize_one_cell_from_json(IColumn& column, 
Slice& slice,
                                                              const 
FormatOptions& options) const {
     auto& null_column = assert_cast<ColumnNullable&>(column);
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h 
b/be/src/vec/data_types/serde/data_type_nullable_serde.h
index 09d2fbde409..7b4841dcbdf 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.h
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h
@@ -47,6 +47,9 @@ public:
                                                int* num_deserialized,
                                                const FormatOptions& options) 
const override;
 
+    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
+                                              int* num_deserialized,
+                                              const FormatOptions& options) 
const override;
     Status deserialize_one_cell_from_hive_text(
             IColumn& column, Slice& slice, const FormatOptions& options,
             int hive_text_complex_type_delimiter_level = 1) const override;
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index 0ba338ce399..299779ea267 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -224,6 +224,28 @@ void 
DataTypeNumberSerDe<T>::read_column_from_arrow(IColumn& column,
     const auto* raw_data = reinterpret_cast<const T*>(buffer->data()) + start;
     col_data.insert(raw_data, raw_data + row_count);
 }
+template <typename T>
+Status DataTypeNumberSerDe<T>::deserialize_column_from_fixed_json(
+        IColumn& column, Slice& slice, int rows, int* num_deserialized,
+        const FormatOptions& options) const {
+    Status st = deserialize_one_cell_from_json(column, slice, options);
+    if (!st.ok()) {
+        return st;
+    }
+
+    DataTypeNumberSerDe::insert_column_last_value_multiple_times(column, rows 
- 1);
+    *num_deserialized = rows;
+    return Status::OK();
+}
+
+template <typename T>
+void DataTypeNumberSerDe<T>::insert_column_last_value_multiple_times(IColumn& 
column,
+                                                                     int 
times) const {
+    auto& col = static_cast<ColumnVector<T>&>(column);
+    auto sz = col.size();
+    T val = col.get_element(sz - 1);
+    col.insert_many_vals(val, times);
+}
 
 template <typename T>
 template <bool is_binary_format>
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h 
b/be/src/vec/data_types/serde/data_type_number_serde.h
index c66bc994605..18ba2fb26c7 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -70,6 +70,12 @@ public:
                                                int* num_deserialized,
                                                const FormatOptions& options) 
const override;
 
+    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
+                                              int* num_deserialized,
+                                              const FormatOptions& options) 
const override;
+
+    void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override;
+
     Status write_column_to_pb(const IColumn& column, PValues& result, int 
start,
                               int end) const override;
     Status read_column_from_pb(IColumn& column, const PValues& arg) const 
override;
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index 77663e1d43a..1f6e24aef3f 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -234,6 +234,27 @@ public:
     virtual Status deserialize_column_from_json_vector(IColumn& column, 
std::vector<Slice>& slices,
                                                        int* num_deserialized,
                                                        const FormatOptions& 
options) const = 0;
+    // deserialize fixed values.Repeatedly insert the value row times into the 
column.
+    virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& 
slice, int rows,
+                                                      int* num_deserialized,
+                                                      const FormatOptions& 
options) const {
+        Status st = deserialize_one_cell_from_json(column, slice, options);
+        if (!st.ok()) {
+            *num_deserialized = 0;
+            return st;
+        }
+        insert_column_last_value_multiple_times(column, rows - 1);
+        *num_deserialized = rows;
+        return Status::OK();
+    }
+    // Insert the last value to the end of this column multiple times.
+    virtual void insert_column_last_value_multiple_times(IColumn& column, int 
times) const {
+        //If you try to simplify this operation by using 
`column.insert_many_from(column, column.size() - 1, rows - 1);`
+        // you are likely to get incorrect data results.
+        MutableColumnPtr dum_col = column.clone_empty();
+        dum_col->insert_from(column, column.size() - 1);
+        column.insert_many_from(*dum_col.get(), 0, times);
+    }
 
     virtual Status deserialize_one_cell_from_hive_text(
             IColumn& column, Slice& slice, const FormatOptions& options,
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h 
b/be/src/vec/data_types/serde/data_type_string_serde.h
index 24f99a12e67..d3161c88706 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -172,6 +172,31 @@ public:
         }
         return Status::OK();
     }
+
+    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
+                                              int* num_deserialized,
+                                              const FormatOptions& options) 
const override {
+        Status st = deserialize_one_cell_from_json(column, slice, options);
+        if (!st.ok()) {
+            return st;
+        }
+
+        
DataTypeStringSerDeBase::insert_column_last_value_multiple_times(column, rows - 
1);
+        *num_deserialized = rows;
+        return Status::OK();
+    }
+
+    void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override {
+        auto& col = static_cast<ColumnString&>(column);
+        auto sz = col.size();
+
+        StringRef ref = col.get_data_at(sz - 1);
+        String str(ref.data, ref.size);
+        std::vector<StringRef> refs(times, {str.data(), str.size()});
+
+        col.insert_many_strings(refs.data(), refs.size());
+    }
+
     Status read_column_from_pb(IColumn& column, const PValues& arg) const 
override {
         auto& column_dest = assert_cast<ColumnType&>(column);
         column_dest.reserve(column_dest.size() + arg.string_value_size());
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 7a820845ed0..4bc52d76959 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -943,13 +943,10 @@ Status OrcReader::_fill_partition_columns(
         auto& [value, slot_desc] = kv.second;
         auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
         Slice slice(value.data(), value.size());
-        vector<Slice> slices(rows);
-        for (int i = 0; i < rows; i++) {
-            slices[i] = {value.data(), value.size()};
-        }
         int num_deserialized = 0;
-        if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, 
&num_deserialized,
-                                                             
_text_formatOptions) != Status::OK()) {
+        if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, 
rows,
+                                                            &num_deserialized,
+                                                            
_text_formatOptions) != Status::OK()) {
             return Status::InternalError("Failed to fill partition column: 
{}={}",
                                          slot_desc->col_name(), value);
         }
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 5e824f34817..9ec1235be1d 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -631,13 +631,10 @@ Status RowGroupReader::_fill_partition_columns(
         auto& [value, slot_desc] = kv.second;
         auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
         Slice slice(value.data(), value.size());
-        vector<Slice> slices(rows);
-        for (int i = 0; i < rows; i++) {
-            slices[i] = {value.data(), value.size()};
-        }
         int num_deserialized = 0;
-        if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, 
&num_deserialized,
-                                                             
_text_formatOptions) != Status::OK()) {
+        if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, 
rows,
+                                                            &num_deserialized,
+                                                            
_text_formatOptions) != Status::OK()) {
             return Status::InternalError("Failed to fill partition column: 
{}={}",
                                          slot_desc->col_name(), value);
         }
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 1f7e2df0f34..afb0fd4298e 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -499,13 +499,10 @@ Status VFileScanner::_fill_columns_from_path(size_t rows) 
{
         auto& [value, slot_desc] = kv.second;
         auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
         Slice slice(value.data(), value.size());
-        vector<Slice> slices(rows);
-        for (int i = 0; i < rows; i++) {
-            slices[i] = {value.data(), value.size()};
-        }
         int num_deserialized = 0;
-        if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, 
&num_deserialized,
-                                                             
_text_formatOptions) != Status::OK()) {
+        if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, 
rows,
+                                                            &num_deserialized,
+                                                            
_text_formatOptions) != Status::OK()) {
             return Status::InternalError("Failed to fill partition column: 
{}={}",
                                          slot_desc->col_name(), value);
         }
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run65.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run65.hql
new file mode 100644
index 00000000000..2c17d743d5c
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run65.hql
@@ -0,0 +1,28 @@
+use default;
+
+
+CREATE TABLE orc_partition_multi_stripe (
+    col1 STRING,
+    col2 INT,
+    col3 DOUBLE
+) PARTITIONED BY (
+    partition_col1 STRING,
+    partition_col2 INT
+)
+STORED AS ORC
+LOCATION '/user/doris/preinstalled_data/orc_table/orc_partition_multi_stripe';
+;
+msck repair table orc_partition_multi_stripe;
+
+CREATE TABLE parquet_partition_multi_row_group (
+    col1 STRING,
+    col2 INT,
+    col3 DOUBLE
+) PARTITIONED BY (
+    partition_col1 STRING,
+    partition_col2 INT
+)
+STORED AS PARQUET
+LOCATION 
'/user/doris/preinstalled_data/parquet_table/parquet_partition_multi_row_group';
+;
+msck repair table parquet_partition_multi_row_group;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_10
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_10
new file mode 100644
index 00000000000..46ebfc96e7e
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_10
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_11
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_11
new file mode 100644
index 00000000000..77c18939ffc
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_11
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_12
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_12
new file mode 100644
index 00000000000..e1327b620f2
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_12
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_13
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_13
new file mode 100644
index 00000000000..0302b81ef34
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_13
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_10
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_10
new file mode 100644
index 00000000000..fbe3d0ce52a
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_10
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_11
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_11
new file mode 100644
index 00000000000..cb71b631472
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_11
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_9
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_9
new file mode 100644
index 00000000000..f3c7d4fe72d
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_9
 differ
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_opt_fill_partition.out 
b/regression-test/data/external_table_p0/hive/test_hive_opt_fill_partition.out
new file mode 100644
index 00000000000..7979586e459
--- /dev/null
+++ 
b/regression-test/data/external_table_p0/hive/test_hive_opt_fill_partition.out
@@ -0,0 +1,101 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !parquet_1 --
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+
+-- !parquet_2 --
+1792
+
+-- !parquet_3 --
+1792
+
+-- !parquet_4 --
+1792
+
+-- !parquet_5 --
+1792
+
+-- !parquet_6 --
+1792
+
+-- !parquet_7 --
+word   1792
+
+-- !parquet_8 --
+hello  1792
+
+-- !parquet_9 --
+1      1792
+
+-- !parquet_10 --
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+
+-- !parquet_11 --
+1792
+
+-- !parquet_12 --
+1792
+
+-- !parquet_13 --
+1792
+
+-- !parquet_14 --
+0
+
+-- !orc_1 --
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+
+-- !orc_2 --
+7680
+
+-- !orc_3 --
+7680
+
+-- !orc_4 --
+7680
+
+-- !orc_5 --
+7680
+
+-- !orc_6 --
+7680
+
+-- !orc_7 --
+word   7680
+
+-- !orc_8 --
+hello  7680
+
+-- !orc_9 --
+1      7680
+
+-- !orc_10 --
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+word   2       2.3     hello   1
+
+-- !orc_11 --
+7680
+
+-- !orc_12 --
+7680
+
+-- !orc_13 --
+7680
+
+-- !orc_14 --
+0
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_opt_fill_partition.groovy
 
b/regression-test/suites/external_table_p0/hive/test_hive_opt_fill_partition.groovy
new file mode 100644
index 00000000000..0bb5249e262
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/hive/test_hive_opt_fill_partition.groovy
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_hive_opt_fill_partition", 
"p0,external,hive,external_docker,external_docker_hive") {
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String hivePrefix  ="hive3";
+        setHivePrefix(hivePrefix)
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+        String hmsPort = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String hdfs_port = context.config.otherConfigs.get(hivePrefix + 
"HdfsPort")
+    
+        String catalog_name = "test_hive_opt_fill_partition"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+        create catalog if not exists ${catalog_name} properties (
+            'type'='hms',
+            'hadoop.username' = 'hadoop',
+            'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}',
+            'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hmsPort}'
+        );
+        """
+
+        sql """ switch ${catalog_name} """
+        sql """ use `default` """
+        
+        qt_parquet_1 """ select * from parquet_partition_multi_row_group limit 
5; """
+        qt_parquet_2 """ select count(col1) from 
parquet_partition_multi_row_group ; """
+        qt_parquet_3 """ select count(col2) from 
parquet_partition_multi_row_group ; """
+        qt_parquet_4 """ select count(col3) from 
parquet_partition_multi_row_group ; """
+        qt_parquet_5 """ select count(partition_col1) from 
parquet_partition_multi_row_group ; """
+        qt_parquet_6 """ select count(partition_col1) from 
parquet_partition_multi_row_group ; """
+        qt_parquet_7 """ select col1,count(*) from 
parquet_partition_multi_row_group group by col1;  """
+        qt_parquet_8 """ select partition_col1,count(*) from 
parquet_partition_multi_row_group group by partition_col1; """
+        qt_parquet_9 """ select partition_col2,count(*) from 
parquet_partition_multi_row_group group by partition_col2; """
+        qt_parquet_10 """ select * from parquet_partition_multi_row_group  
where col1 = 'word' limit 5; """
+        qt_parquet_11 """ select count(*) from 
parquet_partition_multi_row_group  where col2 != 100; """
+        qt_parquet_12 """ select count(*) from 
parquet_partition_multi_row_group  where partition_col1 = 'hello' limit 5; """
+        qt_parquet_13 """ select count(*) from 
parquet_partition_multi_row_group  where partition_col2 = 1 limit 5; """
+        qt_parquet_14 """ select count(*) from 
parquet_partition_multi_row_group  where partition_col2 != 1 ; """
+
+
+        qt_orc_1 """ select * from orc_partition_multi_stripe limit 5; """
+        qt_orc_2 """ select count(col1) from orc_partition_multi_stripe ; """
+        qt_orc_3 """ select count(col2) from orc_partition_multi_stripe ; """
+        qt_orc_4 """ select count(col3) from orc_partition_multi_stripe ; """
+        qt_orc_5 """ select count(partition_col1) from 
orc_partition_multi_stripe ; """
+        qt_orc_6 """ select count(partition_col1) from 
orc_partition_multi_stripe ; """
+        qt_orc_7 """ select col1,count(*) from orc_partition_multi_stripe 
group by col1;  """
+        qt_orc_8 """ select partition_col1,count(*) from 
orc_partition_multi_stripe group by partition_col1; """
+        qt_orc_9 """ select partition_col2,count(*) from 
orc_partition_multi_stripe group by partition_col2; """
+        qt_orc_10 """ select * from orc_partition_multi_stripe  where col1 = 
'word' limit 5; """
+        qt_orc_11 """ select count(*) from orc_partition_multi_stripe  where 
col2 != 100; """
+        qt_orc_12 """ select count(*) from orc_partition_multi_stripe  where 
partition_col1 = 'hello' limit 5; """
+        qt_orc_13 """ select count(*) from orc_partition_multi_stripe  where 
partition_col2 = 1 limit 5; """
+        qt_orc_14 """ select count(*) from orc_partition_multi_stripe  where 
partition_col2 != 1 ; """
+
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to