This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 607c0b82a9c [opt](serde)Optimize the filling of fixed values into block columns without repeated deserialization. (#37377) (#38245) (#38810) 607c0b82a9c is described below commit 607c0b82a9c6b9f39418d921c986ed44186ff3a4 Author: daidai <2017501...@qq.com> AuthorDate: Mon Aug 5 09:13:08 2024 +0800 [opt](serde)Optimize the filling of fixed values into block columns without repeated deserialization. (#37377) (#38245) (#38810) ## Proposed changes pick pr: #38575 and fix this pr bug : #38245 --- .../serde/data_type_datetimev2_serde.cpp | 21 +++++ .../data_types/serde/data_type_datetimev2_serde.h | 5 + .../data_types/serde/data_type_datev2_serde.cpp | 21 +++++ .../vec/data_types/serde/data_type_datev2_serde.h | 6 ++ .../data_types/serde/data_type_decimal_serde.cpp | 26 ++++++ .../vec/data_types/serde/data_type_decimal_serde.h | 6 ++ .../data_types/serde/data_type_nullable_serde.cpp | 22 +++++ .../data_types/serde/data_type_nullable_serde.h | 3 + .../data_types/serde/data_type_number_serde.cpp | 22 +++++ .../vec/data_types/serde/data_type_number_serde.h | 6 ++ be/src/vec/data_types/serde/data_type_serde.h | 21 +++++ .../vec/data_types/serde/data_type_string_serde.h | 25 +++++ be/src/vec/exec/format/orc/vorc_reader.cpp | 9 +- .../exec/format/parquet/vparquet_group_reader.cpp | 9 +- be/src/vec/exec/scan/vfile_scanner.cpp | 9 +- .../scripts/create_preinstalled_scripts/run65.hql | 28 ++++++ .../partition_col2=1/000000_0_copy_10 | Bin 0 -> 738 bytes .../partition_col2=1/000000_0_copy_11 | Bin 0 -> 806 bytes .../partition_col2=1/000000_0_copy_12 | Bin 0 -> 936 bytes .../partition_col2=1/000000_0_copy_13 | Bin 0 -> 1153 bytes .../partition_col2=1/000000_0_copy_10 | Bin 0 -> 3193 bytes .../partition_col2=1/000000_0_copy_11 | Bin 0 -> 5703 bytes .../partition_col2=1/000000_0_copy_9 | Bin 0 -> 1687 bytes .../hive/test_hive_opt_fill_partition.out | 101 +++++++++++++++++++++ .../hive/test_hive_opt_fill_partition.groovy | 74 +++++++++++++++ 25 files changed, 396 insertions(+), 18 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index 63a199199a0..850ac5766fc 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -247,4 +247,25 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& timezone, return Status::OK(); } +Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json( + IColumn& column, Slice& slice, int rows, int* num_deserialized, + const FormatOptions& options) const { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + + DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); +} + +void DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(IColumn& column, + int times) const { + auto& col = static_cast<ColumnVector<UInt64>&>(column); + auto sz = col.size(); + UInt64 val = col.get_element(sz - 1); + col.insert_many_vals(val, times); +} + } // namespace doris::vectorized diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index 00b05f5fcd6..ef4aa6843a0 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -77,6 +77,11 @@ public: int start, int end, std::vector<StringRef>& buffer_list) const override; + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override; + void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + private: template <bool is_binary_format> Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer<is_binary_format>& result, diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp index eb9122dd240..f2d595b87c4 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -175,5 +175,26 @@ Status DataTypeDateV2SerDe::write_column_to_orc(const std::string& timezone, con return Status::OK(); } +Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column, Slice& slice, + int rows, int* num_deserialized, + const FormatOptions& options) const { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + DataTypeDateV2SerDe::insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); +} + +void DataTypeDateV2SerDe::insert_column_last_value_multiple_times(IColumn& column, + int times) const { + auto& col = static_cast<ColumnVector<UInt32>&>(column); + auto sz = col.size(); + UInt32 val = col.get_element(sz - 1); + + col.insert_many_vals(val, times); +} + } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index 9a8b050eeba..52e4cec364e 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -74,6 +74,12 @@ public: int start, int end, std::vector<StringRef>& buffer_list) const override; + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override; + + void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + private: template <bool is_binary_format> Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer<is_binary_format>& result, diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index a59fdedbfe6..e979211d6d7 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -275,6 +275,32 @@ Status DataTypeDecimalSerDe<T>::write_column_to_orc(const std::string& timezone, } return Status::OK(); } +template <typename T> + +Status DataTypeDecimalSerDe<T>::deserialize_column_from_fixed_json( + IColumn& column, Slice& slice, int rows, int* num_deserialized, + const FormatOptions& options) const { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + + DataTypeDecimalSerDe::insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); +} + +template <typename T> +void DataTypeDecimalSerDe<T>::insert_column_last_value_multiple_times(IColumn& column, + int times) const { + auto& col = static_cast<ColumnDecimal<T>&>(column); + auto sz = col.size(); + + T val = col.get_element(sz - 1); + for (int i = 0; i < times; i++) { + col.insert_value(val); + } +} template class DataTypeDecimalSerDe<Decimal32>; template class DataTypeDecimalSerDe<Decimal64>; diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 55e68699f01..484c6686bc5 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -114,6 +114,12 @@ public: int start, int end, std::vector<StringRef>& buffer_list) const override; + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override; + + void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + private: template <bool is_binary_format> Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer<is_binary_format>& result, diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index faa3c8eb1f4..014e8b0d5eb 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -127,6 +127,28 @@ Status DataTypeNullableSerDe::deserialize_column_from_hive_text_vector( return Status::OK(); } +Status DataTypeNullableSerDe::deserialize_column_from_fixed_json( + IColumn& column, Slice& slice, int rows, int* num_deserialized, + const FormatOptions& options) const { + auto& col = static_cast<ColumnNullable&>(column); + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + if (rows - 1 != 0) { + auto& null_map = col.get_null_map_data(); + auto& nested_column = col.get_nested_column(); + + uint8_t val = null_map.back(); + size_t new_sz = null_map.size() + rows - 1; + null_map.resize_fill(new_sz, + val); // data_type_nullable::insert_column_last_value_multiple_times() + nested_serde->insert_column_last_value_multiple_times(nested_column, rows - 1); + } + *num_deserialized = rows; + return Status::OK(); +} + Status DataTypeNullableSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& null_column = assert_cast<ColumnNullable&>(column); diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 09d2fbde409..7b4841dcbdf 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -47,6 +47,9 @@ public: int* num_deserialized, const FormatOptions& options) const override; + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override; Status deserialize_one_cell_from_hive_text( IColumn& column, Slice& slice, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const override; diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index 0ba338ce399..299779ea267 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -224,6 +224,28 @@ void DataTypeNumberSerDe<T>::read_column_from_arrow(IColumn& column, const auto* raw_data = reinterpret_cast<const T*>(buffer->data()) + start; col_data.insert(raw_data, raw_data + row_count); } +template <typename T> +Status DataTypeNumberSerDe<T>::deserialize_column_from_fixed_json( + IColumn& column, Slice& slice, int rows, int* num_deserialized, + const FormatOptions& options) const { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + + DataTypeNumberSerDe::insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); +} + +template <typename T> +void DataTypeNumberSerDe<T>::insert_column_last_value_multiple_times(IColumn& column, + int times) const { + auto& col = static_cast<ColumnVector<T>&>(column); + auto sz = col.size(); + T val = col.get_element(sz - 1); + col.insert_many_vals(val, times); +} template <typename T> template <bool is_binary_format> diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index c66bc994605..18ba2fb26c7 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -70,6 +70,12 @@ public: int* num_deserialized, const FormatOptions& options) const override; + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override; + + void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 77663e1d43a..1f6e24aef3f 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -234,6 +234,27 @@ public: virtual Status deserialize_column_from_json_vector(IColumn& column, std::vector<Slice>& slices, int* num_deserialized, const FormatOptions& options) const = 0; + // deserialize fixed values.Repeatedly insert the value row times into the column. + virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + *num_deserialized = 0; + return st; + } + insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); + } + // Insert the last value to the end of this column multiple times. + virtual void insert_column_last_value_multiple_times(IColumn& column, int times) const { + //If you try to simplify this operation by using `column.insert_many_from(column, column.size() - 1, rows - 1);` + // you are likely to get incorrect data results. + MutableColumnPtr dum_col = column.clone_empty(); + dum_col->insert_from(column, column.size() - 1); + column.insert_many_from(*dum_col.get(), 0, times); + } virtual Status deserialize_one_cell_from_hive_text( IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index 24f99a12e67..d3161c88706 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -172,6 +172,31 @@ public: } return Status::OK(); } + + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + + DataTypeStringSerDeBase::insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); + } + + void insert_column_last_value_multiple_times(IColumn& column, int times) const override { + auto& col = static_cast<ColumnString&>(column); + auto sz = col.size(); + + StringRef ref = col.get_data_at(sz - 1); + String str(ref.data, ref.size); + std::vector<StringRef> refs(times, {str.data(), str.size()}); + + col.insert_many_strings(refs.data(), refs.size()); + } + Status read_column_from_pb(IColumn& column, const PValues& arg) const override { auto& column_dest = assert_cast<ColumnType&>(column); column_dest.reserve(column_dest.size() + arg.string_value_size()); diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 7a820845ed0..4bc52d76959 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -943,13 +943,10 @@ Status OrcReader::_fill_partition_columns( auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); - vector<Slice> slices(rows); - for (int i = 0; i < rows; i++) { - slices[i] = {value.data(), value.size()}; - } int num_deserialized = 0; - if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, &num_deserialized, - _text_formatOptions) != Status::OK()) { + if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, + &num_deserialized, + _text_formatOptions) != Status::OK()) { return Status::InternalError("Failed to fill partition column: {}={}", slot_desc->col_name(), value); } diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 5e824f34817..9ec1235be1d 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -631,13 +631,10 @@ Status RowGroupReader::_fill_partition_columns( auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); - vector<Slice> slices(rows); - for (int i = 0; i < rows; i++) { - slices[i] = {value.data(), value.size()}; - } int num_deserialized = 0; - if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, &num_deserialized, - _text_formatOptions) != Status::OK()) { + if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, + &num_deserialized, + _text_formatOptions) != Status::OK()) { return Status::InternalError("Failed to fill partition column: {}={}", slot_desc->col_name(), value); } diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 1f7e2df0f34..afb0fd4298e 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -499,13 +499,10 @@ Status VFileScanner::_fill_columns_from_path(size_t rows) { auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); - vector<Slice> slices(rows); - for (int i = 0; i < rows; i++) { - slices[i] = {value.data(), value.size()}; - } int num_deserialized = 0; - if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, &num_deserialized, - _text_formatOptions) != Status::OK()) { + if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, + &num_deserialized, + _text_formatOptions) != Status::OK()) { return Status::InternalError("Failed to fill partition column: {}={}", slot_desc->col_name(), value); } diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run65.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run65.hql new file mode 100644 index 00000000000..2c17d743d5c --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run65.hql @@ -0,0 +1,28 @@ +use default; + + +CREATE TABLE orc_partition_multi_stripe ( + col1 STRING, + col2 INT, + col3 DOUBLE +) PARTITIONED BY ( + partition_col1 STRING, + partition_col2 INT +) +STORED AS ORC +LOCATION '/user/doris/preinstalled_data/orc_table/orc_partition_multi_stripe'; +; +msck repair table orc_partition_multi_stripe; + +CREATE TABLE parquet_partition_multi_row_group ( + col1 STRING, + col2 INT, + col3 DOUBLE +) PARTITIONED BY ( + partition_col1 STRING, + partition_col2 INT +) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_partition_multi_row_group'; +; +msck repair table parquet_partition_multi_row_group; diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_10 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_10 new file mode 100644 index 00000000000..46ebfc96e7e Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_10 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_11 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_11 new file mode 100644 index 00000000000..77c18939ffc Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_11 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_12 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_12 new file mode 100644 index 00000000000..e1327b620f2 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_12 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_13 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_13 new file mode 100644 index 00000000000..0302b81ef34 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_partition_multi_stripe/partition_col1=hello/partition_col2=1/000000_0_copy_13 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_10 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_10 new file mode 100644 index 00000000000..fbe3d0ce52a Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_10 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_11 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_11 new file mode 100644 index 00000000000..cb71b631472 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_11 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_9 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_9 new file mode 100644 index 00000000000..f3c7d4fe72d Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_partition_multi_row_group/partition_col1=hello/partition_col2=1/000000_0_copy_9 differ diff --git a/regression-test/data/external_table_p0/hive/test_hive_opt_fill_partition.out b/regression-test/data/external_table_p0/hive/test_hive_opt_fill_partition.out new file mode 100644 index 00000000000..7979586e459 --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_hive_opt_fill_partition.out @@ -0,0 +1,101 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !parquet_1 -- +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 + +-- !parquet_2 -- +1792 + +-- !parquet_3 -- +1792 + +-- !parquet_4 -- +1792 + +-- !parquet_5 -- +1792 + +-- !parquet_6 -- +1792 + +-- !parquet_7 -- +word 1792 + +-- !parquet_8 -- +hello 1792 + +-- !parquet_9 -- +1 1792 + +-- !parquet_10 -- +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 + +-- !parquet_11 -- +1792 + +-- !parquet_12 -- +1792 + +-- !parquet_13 -- +1792 + +-- !parquet_14 -- +0 + +-- !orc_1 -- +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 + +-- !orc_2 -- +7680 + +-- !orc_3 -- +7680 + +-- !orc_4 -- +7680 + +-- !orc_5 -- +7680 + +-- !orc_6 -- +7680 + +-- !orc_7 -- +word 7680 + +-- !orc_8 -- +hello 7680 + +-- !orc_9 -- +1 7680 + +-- !orc_10 -- +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 +word 2 2.3 hello 1 + +-- !orc_11 -- +7680 + +-- !orc_12 -- +7680 + +-- !orc_13 -- +7680 + +-- !orc_14 -- +0 + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_opt_fill_partition.groovy b/regression-test/suites/external_table_p0/hive/test_hive_opt_fill_partition.groovy new file mode 100644 index 00000000000..0bb5249e262 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_opt_fill_partition.groovy @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_hive_opt_fill_partition", "p0,external,hive,external_docker,external_docker_hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String hivePrefix ="hive3"; + setHivePrefix(hivePrefix) + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + + String catalog_name = "test_hive_opt_fill_partition" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hmsPort}' + ); + """ + + sql """ switch ${catalog_name} """ + sql """ use `default` """ + + qt_parquet_1 """ select * from parquet_partition_multi_row_group limit 5; """ + qt_parquet_2 """ select count(col1) from parquet_partition_multi_row_group ; """ + qt_parquet_3 """ select count(col2) from parquet_partition_multi_row_group ; """ + qt_parquet_4 """ select count(col3) from parquet_partition_multi_row_group ; """ + qt_parquet_5 """ select count(partition_col1) from parquet_partition_multi_row_group ; """ + qt_parquet_6 """ select count(partition_col1) from parquet_partition_multi_row_group ; """ + qt_parquet_7 """ select col1,count(*) from parquet_partition_multi_row_group group by col1; """ + qt_parquet_8 """ select partition_col1,count(*) from parquet_partition_multi_row_group group by partition_col1; """ + qt_parquet_9 """ select partition_col2,count(*) from parquet_partition_multi_row_group group by partition_col2; """ + qt_parquet_10 """ select * from parquet_partition_multi_row_group where col1 = 'word' limit 5; """ + qt_parquet_11 """ select count(*) from parquet_partition_multi_row_group where col2 != 100; """ + qt_parquet_12 """ select count(*) from parquet_partition_multi_row_group where partition_col1 = 'hello' limit 5; """ + qt_parquet_13 """ select count(*) from parquet_partition_multi_row_group where partition_col2 = 1 limit 5; """ + qt_parquet_14 """ select count(*) from parquet_partition_multi_row_group where partition_col2 != 1 ; """ + + + qt_orc_1 """ select * from orc_partition_multi_stripe limit 5; """ + qt_orc_2 """ select count(col1) from orc_partition_multi_stripe ; """ + qt_orc_3 """ select count(col2) from orc_partition_multi_stripe ; """ + qt_orc_4 """ select count(col3) from orc_partition_multi_stripe ; """ + qt_orc_5 """ select count(partition_col1) from orc_partition_multi_stripe ; """ + qt_orc_6 """ select count(partition_col1) from orc_partition_multi_stripe ; """ + qt_orc_7 """ select col1,count(*) from orc_partition_multi_stripe group by col1; """ + qt_orc_8 """ select partition_col1,count(*) from orc_partition_multi_stripe group by partition_col1; """ + qt_orc_9 """ select partition_col2,count(*) from orc_partition_multi_stripe group by partition_col2; """ + qt_orc_10 """ select * from orc_partition_multi_stripe where col1 = 'word' limit 5; """ + qt_orc_11 """ select count(*) from orc_partition_multi_stripe where col2 != 100; """ + qt_orc_12 """ select count(*) from orc_partition_multi_stripe where partition_col1 = 'hello' limit 5; """ + qt_orc_13 """ select count(*) from orc_partition_multi_stripe where partition_col2 = 1 limit 5; """ + qt_orc_14 """ select count(*) from orc_partition_multi_stripe where partition_col2 != 1 ; """ + + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org