This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 34fafcaf05083f34ff7126ff870c531a53dd53a3 Author: lihangyu <15605149...@163.com> AuthorDate: Fri Apr 12 10:33:47 2024 +0800 [Feature](Variant) support aggregation model for Variant type (#33493) refactor use `insert_from` to replace `replace_column_data` for variable lengths columns --- be/src/olap/tablet_schema.h | 21 +- be/src/vec/columns/column_array.h | 16 +- be/src/vec/columns/column_map.h | 18 +- be/src/vec/columns/column_object.cpp | 21 +- be/src/vec/columns/column_object.h | 10 +- be/src/vec/columns/column_string.h | 29 +-- be/src/vec/columns/column_struct.h | 14 +- be/src/vec/common/schema_util.cpp | 7 +- be/src/vec/common/schema_util.h | 4 +- be/src/vec/exec/scan/new_olap_scanner.cpp | 2 +- be/src/vec/olap/block_reader.cpp | 4 +- be/src/vec/olap/vertical_block_reader.cpp | 4 +- regression-test/data/variant_p0/agg.out | 78 +++++++ .../data/variant_p0/compaction/test_compaction.out | 104 ++++++++++ regression-test/data/variant_p0/desc.out | 230 ++++++++++----------- regression-test/suites/variant_p0/agg.groovy | 63 ++++++ .../variant_p0/compaction/test_compaction.groovy | 11 +- 17 files changed, 422 insertions(+), 214 deletions(-) diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index dae7c860518..33942035d29 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -127,7 +127,10 @@ public: int frac() const { return _frac; } inline bool visible() const { return _visible; } - void set_aggregation_method(FieldAggregationMethod agg) { _aggregation = agg; } + void set_aggregation_method(FieldAggregationMethod agg) { + _aggregation = agg; + _aggregation_name = get_string_by_aggregation_type(agg); + } /** * Add a sub column. @@ -426,6 +429,22 @@ public: return str; } + string dump_full_schema() const { + string str = "["; + for (auto p : _cols) { + if (str.size() > 1) { + str += ", "; + } + ColumnPB col_pb; + p->to_schema_pb(&col_pb); + str += "("; + str += col_pb.ShortDebugString(); + str += ")"; + } + str += "]"; + return str; + } + vectorized::Block create_block_by_cids(const std::vector<uint32_t>& cids); std::shared_ptr<TabletSchema> copy_without_extracted_columns(); diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 3176f7a45c6..118e7ab05c6 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -217,23 +217,11 @@ public: const uint32_t* indices_end) override; void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { - DCHECK(size() > self_row); - const auto& r = assert_cast<const ColumnArray&>(rhs); - const size_t nested_row_size = r.size_at(row); - const size_t r_nested_start_off = r.offset_at(row); - - // we should clear data because we call resize() before replace_column_data() - if (self_row == 0) { - data->clear(); - } - get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size; - // we make sure call replace_column_data() by order so, here we just insert data for nested - data->insert_range_from(r.get_data(), r_nested_start_off, nested_row_size); + LOG(FATAL) << "Method replace_column_data is not supported for " << get_name(); } void replace_column_data_default(size_t self_row = 0) override { - DCHECK(size() > self_row); - get_offsets()[self_row] = get_offsets()[self_row - 1]; + LOG(FATAL) << "Method replace_column_data_default is not supported for " << get_name(); } void clear() override { diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index ed5a692defd..2cdfcae8c73 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -135,25 +135,11 @@ public: } void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { - DCHECK(size() > self_row); - const auto& r = assert_cast<const ColumnMap&>(rhs); - const size_t nested_row_size = r.size_at(row); - const size_t r_key_nested_start_off = r.offset_at(row); - const size_t r_val_nested_start_off = r.offset_at(row); - - if (self_row == 0) { - keys_column->clear(); - values_column->clear(); - } - get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size; - // here we use batch size to avoid many virtual call in nested column - keys_column->insert_range_from(r.get_keys(), r_key_nested_start_off, nested_row_size); - values_column->insert_range_from(r.get_values(), r_val_nested_start_off, nested_row_size); + LOG(FATAL) << "Method replace_column_data is not supported for " << get_name(); } void replace_column_data_default(size_t self_row = 0) override { - DCHECK(size() > self_row); - get_offsets()[self_row] = get_offsets()[self_row - 1]; + LOG(FATAL) << "Method replace_column_data_default is not supported for " << get_name(); } ColumnArray::Offsets64& ALWAYS_INLINE get_offsets() { diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index b88887b580f..c596717194f 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -732,18 +732,7 @@ Field ColumnObject::operator[](size_t n) const { } void ColumnObject::get(size_t n, Field& res) const { - if (!is_finalized()) { - const_cast<ColumnObject*>(this)->finalize(); - } - auto& map = res.get<VariantMap&>(); - for (const auto& entry : subcolumns) { - auto it = map.try_emplace(entry->path.get_path()).first; - if (WhichDataType(remove_nullable(entry->data.data_types.back())).is_json()) { - // JsonbFiled is special case - it->second = JsonbField(); - } - entry->data.data.back()->get(n, it->second); - } + res = (*this)[n]; } Status ColumnObject::try_insert_indices_from(const IColumn& src, const int* indices_begin, @@ -1513,4 +1502,12 @@ Status ColumnObject::sanitize() const { return Status::OK(); } +void ColumnObject::replace_column_data(const IColumn& col, size_t row, size_t self_row) { + LOG(FATAL) << "Method replace_column_data is not supported for " << get_name(); +} + +void ColumnObject::replace_column_data_default(size_t self_row) { + LOG(FATAL) << "Method replace_column_data_default is not supported for " << get_name(); +} + } // namespace doris::vectorized diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index a940b4a8811..26b2c66a755 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -456,13 +456,11 @@ public: LOG(FATAL) << "should not call the method in column object"; } - void replace_column_data(const IColumn&, size_t row, size_t self_row) override { - LOG(FATAL) << "should not call the method in column object"; - } + bool is_variable_length() const override { return true; } - void replace_column_data_default(size_t self_row) override { - LOG(FATAL) << "should not call the method in column object"; - } + void replace_column_data(const IColumn&, size_t row, size_t self_row) override; + + void replace_column_data_default(size_t self_row) override; void get_indices_of_non_default_rows(Offsets64&, size_t, size_t) const override { LOG(FATAL) << "should not call the method in column object"; diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index b858ab86bf3..405ada3b48d 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -545,37 +545,12 @@ public: } void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { - // we check this column size and self_row because we need to make sure when we call - // replace_column_data() with a batch column data. - // and this column data is cleared at the every beginning. - // next we replace column one by one. - DCHECK(size() > self_row); - const auto& r = assert_cast<const ColumnString&>(rhs); - auto data = r.get_data_at(row); - - if (!self_row) { - // self_row == 0 means we first call replace_column_data() with batch column data. so we - // should clean last batch column data. - chars.clear(); - offsets[self_row] = data.size; - } else { - offsets[self_row] = offsets[self_row - 1] + data.size; - check_chars_length(offsets[self_row], self_row); - } - - chars.insert(data.data, data.data + data.size); + LOG(FATAL) << "Method replace_column_data is not supported for " << get_name(); } // should replace according to 0,1,2... ,size,0,1,2... void replace_column_data_default(size_t self_row = 0) override { - DCHECK(size() > self_row); - - if (!self_row) { - chars.clear(); - offsets[self_row] = 0; - } else { - offsets[self_row] = offsets[self_row - 1]; - } + LOG(FATAL) << "Method replace_column_data_default is not supported for " << get_name(); } void compare_internal(size_t rhs_row_id, const IColumn& rhs, int nan_direction_hint, diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 34d1e3ecf0e..2ca4fdec015 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -88,6 +88,8 @@ public: MutableColumnPtr clone_resized(size_t size) const override; size_t size() const override { return columns.at(0)->size(); } + bool is_variable_length() const override { return true; } + Field operator[](size_t n) const override; void get(size_t n, Field& res) const override; @@ -131,19 +133,11 @@ public: return append_data_by_selector_impl<ColumnStruct>(res, selector); } void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { - DCHECK(size() > self_row); - const auto& r = assert_cast<const ColumnStruct&>(rhs); - - for (size_t idx = 0; idx < columns.size(); ++idx) { - columns[idx]->replace_column_data(r.get_column(idx), row, self_row); - } + LOG(FATAL) << "Method replace_column_data is not supported for " << get_name(); } void replace_column_data_default(size_t self_row = 0) override { - DCHECK(size() > self_row); - for (size_t idx = 0; idx < columns.size(); ++idx) { - columns[idx]->replace_column_data_default(self_row); - } + LOG(FATAL) << "Method replace_column_data_default is not supported for " << get_name(); } void insert_range_from(const IColumn& src, size_t start, size_t length) override; diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 71cdece8333..5c7a2f8482a 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -369,7 +369,7 @@ void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas, update_least_schema_internal(subcolumns_types, common_schema, true, variant_col_unique_id); } -void inherit_tablet_index(TabletSchemaSPtr& schema) { +void inherit_root_attributes(TabletSchemaSPtr& schema) { std::unordered_map<int32_t, TabletIndex> variants_index_meta; // Get all variants tablet index metas if exist for (const auto& col : schema->columns()) { @@ -392,6 +392,7 @@ void inherit_tablet_index(TabletSchemaSPtr& schema) { // above types are not supported in bf col.set_is_bf_column(schema->column(col.parent_unique_id()).is_bf_column()); } + col.set_aggregation_method(schema->column(col.parent_unique_id()).aggregation()); auto it = variants_index_meta.find(col.parent_unique_id()); // variant has no index meta, ignore if (it == variants_index_meta.end()) { @@ -467,7 +468,7 @@ Status get_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas, update_least_sparse_column(schemas, output_schema, unique_id, path_set); } - inherit_tablet_index(output_schema); + inherit_root_attributes(output_schema); if (check_schema_size && output_schema->columns().size() > config::variant_max_merged_tablet_schema_size) { return Status::DataQualityError("Reached max column size limit {}", @@ -710,7 +711,7 @@ void rebuild_schema_and_block(const TabletSchemaSPtr& original, VLOG_DEBUG << "set root_path : " << full_root_path.get_path(); } - vectorized::schema_util::inherit_tablet_index(flush_schema); + vectorized::schema_util::inherit_root_attributes(flush_schema); } // --------------------------- diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index e1bb037f70e..e6ed60480f5 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -116,8 +116,8 @@ void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas, TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id, const std::unordered_set<PathInData, PathInData::Hash>& path_set); -// inherit index info from it's parent column -void inherit_tablet_index(TabletSchemaSPtr& schema); +// inherit attributes like index/agg info from it's parent column +void inherit_root_attributes(TabletSchemaSPtr& schema); // Rebuild schema from original schema by extend dynamic columns generated from ColumnObject. // Block consists of two parts, dynamic part of columns and static part of columns. diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index e69cb94a0df..e816237984e 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -445,7 +445,7 @@ Status NewOlapScanner::_init_variant_columns() { } } } - schema_util::inherit_tablet_index(tablet_schema); + schema_util::inherit_root_attributes(tablet_schema); return Status::OK(); } diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp index 3d4c4d4b318..beb0bb7c8cc 100644 --- a/be/src/vec/olap/block_reader.cpp +++ b/be/src/vec/olap/block_reader.cpp @@ -475,10 +475,10 @@ size_t BlockReader::_copy_agg_data() { auto& dst_column = _stored_data_columns[idx]; if (_stored_has_variable_length_tag[idx]) { //variable length type should replace ordered + dst_column->clear(); for (size_t i = 0; i < copy_size; i++) { auto& ref = _stored_row_ref[i]; - dst_column->replace_column_data(*ref.block->get_by_position(idx).column, - ref.row_pos, i); + dst_column->insert_from(*ref.block->get_by_position(idx).column, ref.row_pos); } } else { for (auto& it : _temp_ref_map) { diff --git a/be/src/vec/olap/vertical_block_reader.cpp b/be/src/vec/olap/vertical_block_reader.cpp index 5a08e4ca3f2..c472e678abd 100644 --- a/be/src/vec/olap/vertical_block_reader.cpp +++ b/be/src/vec/olap/vertical_block_reader.cpp @@ -329,10 +329,10 @@ size_t VerticalBlockReader::_copy_agg_data() { auto& dst_column = _stored_data_columns[idx]; if (_stored_has_variable_length_tag[idx]) { //variable length type should replace ordered + dst_column->clear(); for (size_t i = 0; i < copy_size; i++) { auto& ref = _stored_row_ref[i]; - dst_column->replace_column_data(*ref.block->get_by_position(idx).column, - ref.row_pos, i); + dst_column->insert_from(*ref.block->get_by_position(idx).column, ref.row_pos); } } else { for (auto& it : _temp_ref_map) { diff --git a/regression-test/data/variant_p0/agg.out b/regression-test/data/variant_p0/agg.out new file mode 100644 index 00000000000..958e3d41a7b --- /dev/null +++ b/regression-test/data/variant_p0/agg.out @@ -0,0 +1,78 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- + +-- !sql2 -- +1 \N {"c":[{"a":1}]} +1022 \N {"f":17034,"g":1.111} +1029 \N {"c":1} +1999 \N {"c":1} + +-- !sql3 -- +1 {"a":1,"b":{"c":[{"a":1}]}} +2 [2] +3 3 +4 "4" +5 5 + +-- !sql4 -- +{"c":[{"a":1}]} [{"a":1}] \N +{"f":17034,"g":1.111} \N \N +{"c":1} 1 \N +{"c":1} 1 \N + +-- !sql5 -- + +-- !sql6 -- +{"c":[{"a":1}]} +{"f":17034,"g":1.111} +{"c":1} +{"c":1} + +-- !sql7 -- +1 {"a":1,"b":{"c":[{"a":1}]}} 59 +1022 {"a":1,"b":{"f":17034,"g":1.111}} 12 +1029 \N 12 +1999 {"a":1,"b":{"c":1}} 11 + +-- !sql8 -- +1 {"a":1,"b":{"c":[{"a":1}]}} 59 +2 [2] 2 +3 3 3 +4 "4" 4 +5 5 5 +6 "[6]" 6 +7 7 7 +8 8.11111 8 +9 "9999" 9 +10 1000000 10 +11 [123] 11 +12 [123.2] 12 +1022 {"a":1,"b":{"f":17034,"g":1.111}} 12 +1029 \N 12 +1999 {"a":1,"b":{"c":1}} 11 +19921 {"a":1,"d":10} 11 + +-- !sql9 -- +0 {"a":11245,"f":["123456"]} +1 {"a":11245,"f":["123456"]} +2 {"a":11245,"f":["123456"]} +3 {"a":11245,"f":["123456"]} +4 {"a":11245,"f":["123456"]} +5 {"a":11245,"f":["123456"]} +6 {"a":11245,"f":["123456"]} +7 {"a":11245,"f":["123456"]} +8 {"a":11245,"f":["123456"]} +9 {"a":11245,"f":["123456"]} + +-- !sql9 -- +1025 {"a":11245,"y":11111111} +1026 {"a":11245,"y":11111111} +1027 {"a":11245,"y":11111111} +1028 {"a":11245,"y":11111111} +1029 {"a":11245,"y":11111111} +1030 {"a":11245,"y":11111111} +1031 {"a":11245,"y":11111111} +1032 {"a":11245,"y":11111111} +1033 {"a":11245,"y":11111111} +1034 {"a":11245,"y":11111111} + diff --git a/regression-test/data/variant_p0/compaction/test_compaction.out b/regression-test/data/variant_p0/compaction/test_compaction.out index 50943aef7b6..57a5c142fbb 100644 --- a/regression-test/data/variant_p0/compaction/test_compaction.out +++ b/regression-test/data/variant_p0/compaction/test_compaction.out @@ -279,3 +279,107 @@ {"c":[{"a":1}]} [{"a":1}] {"c":1} 1 +-- !sql_1 -- +1 {"x":[1]} +2 {"a":"1"} +3 {"x":[3]} +4 {"y":1} +5 {"z":2.0} +6 {"x":111} +7 {"m":1} +8 {"l":2} +9 {"g":1.11} +10 {"z":1.1111} +11 {"sala":0} +12 {"dddd":0.1} +13 {"a":1} +14 {"a":[[[1]]]} +15 {"a":1} +16 {"a":"1223"} +17 {"a":[1]} +18 {"a":["1",2,1.1]} +19 {"a":1,"b":{"c":1}} +20 {"a":1,"b":{"c":[{"a":1}]}} +21 {"a":1,"b":{"c":[{"a":1}]}} +22 {"a":1,"b":{"c":[{"a":1}]}} +1022 {"a":1,"b":10} +1029 {"a":1,"b":{"c":1}} +1999 {"a":1,"b":{"c":1}} +19921 {"a":1,"b":10} + +-- !sql_2 -- +14 [null] +17 [1] +18 [1, 2, null] + +-- !sql_3 -- +19 1 {"c":1} +20 1 {"c":[{"a":1}]} +21 1 {"c":[{"a":1}]} +22 1 {"c":[{"a":1}]} +1029 1 {"c":1} +1999 1 {"c":1} + +-- !sql_5 -- +10 \N +{"c":1} 1 +{"c":1} 1 +10 \N +{"c":[{"a":1}]} [{"a":1}] +{"c":[{"a":1}]} [{"a":1}] +{"c":[{"a":1}]} [{"a":1}] +{"c":1} 1 +{} \N +{} \N + +-- !sql_11 -- +1 {"x":[1]} +2 {"a":"1"} +3 {"x":[3]} +4 {"y":1} +5 {"z":2.0} +6 {"x":111} +7 {"m":1} +8 {"l":2} +9 {"g":1.11} +10 {"z":1.1111} +11 {"sala":0} +12 {"dddd":0.1} +13 {"a":1} +14 {"a":[[[1]]]} +15 {"a":1} +16 {"a":"1223"} +17 {"a":[1]} +18 {"a":["1",2,1.1]} +19 {"a":1,"b":{"c":1}} +20 {"a":1,"b":{"c":[{"a":1}]}} +21 {"a":1,"b":{"c":[{"a":1}]}} +22 {"a":1,"b":{"c":[{"a":1}]}} +1022 {"a":1,"b":10} +1029 {"a":1,"b":{"c":1}} +1999 {"a":1,"b":{"c":1}} +19921 {"a":1,"b":10} + +-- !sql_22 -- +14 [null] +17 [1] +18 [1, 2, null] + +-- !sql_33 -- +19 1 {"c":1} +20 1 {"c":[{"a":1}]} +21 1 {"c":[{"a":1}]} +22 1 {"c":[{"a":1}]} +1029 1 {"c":1} +1999 1 {"c":1} + +-- !sql_55 -- +10 \N +{"c":1} 1 +{"c":1} 1 +10 \N +{"c":[{"a":1}]} [{"a":1}] +{"c":[{"a":1}]} [{"a":1}] +{"c":[{"a":1}]} [{"a":1}] +{"c":1} 1 + diff --git a/regression-test/data/variant_p0/desc.out b/regression-test/data/variant_p0/desc.out index ecf0925a9c9..b46b5f9b4b0 100644 --- a/regression-test/data/variant_p0/desc.out +++ b/regression-test/data/variant_p0/desc.out @@ -2,107 +2,107 @@ -- !sql_1 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.xxxx TEXT Yes false \N +v.a SMALLINT Yes false \N NONE +v.xxxx TEXT Yes false \N NONE -- !sql_2 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.ddd.aaa TINYINT Yes false \N -v.ddd.mxmxm JSON Yes false \N -v.xxxx TEXT Yes false \N +v.a SMALLINT Yes false \N NONE +v.ddd.aaa TINYINT Yes false \N NONE +v.ddd.mxmxm JSON Yes false \N NONE +v.xxxx TEXT Yes false \N NONE -- !sql_3 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.b JSON Yes false \N -v.c.c SMALLINT Yes false \N -v.c.e DOUBLE Yes false \N -v.xxxx TEXT Yes false \N +v.a SMALLINT Yes false \N NONE +v.b JSON Yes false \N NONE +v.c.c SMALLINT Yes false \N NONE +v.c.e DOUBLE Yes false \N NONE +v.xxxx TEXT Yes false \N NONE -- !sql_6_1 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.ddd.aaa TINYINT Yes false \N -v.ddd.mxmxm JSON Yes false \N -v.xxxx TEXT Yes false \N +v.a SMALLINT Yes false \N NONE +v.ddd.aaa TINYINT Yes false \N NONE +v.ddd.mxmxm JSON Yes false \N NONE +v.xxxx TEXT Yes false \N NONE -- !sql_6_2 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.xxxx TEXT Yes false \N +v.a SMALLINT Yes false \N NONE +v.xxxx TEXT Yes false \N NONE -- !sql_6_3 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.b JSON Yes false \N -v.c.c SMALLINT Yes false \N -v.c.e DOUBLE Yes false \N +v.a SMALLINT Yes false \N NONE +v.b JSON Yes false \N NONE +v.c.c SMALLINT Yes false \N NONE +v.c.e DOUBLE Yes false \N NONE -- !sql_6 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.b JSON Yes false \N -v.c.c SMALLINT Yes false \N -v.c.e DOUBLE Yes false \N -v.ddd.aaa TINYINT Yes false \N -v.ddd.mxmxm JSON Yes false \N -v.xxxx TEXT Yes false \N +v.a SMALLINT Yes false \N NONE +v.b JSON Yes false \N NONE +v.c.c SMALLINT Yes false \N NONE +v.c.e DOUBLE Yes false \N NONE +v.ddd.aaa TINYINT Yes false \N NONE +v.ddd.mxmxm JSON Yes false \N NONE +v.xxxx TEXT Yes false \N NONE -- !sql_7 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.b JSON Yes false \N -v.c.c SMALLINT Yes false \N -v.c.e DOUBLE Yes false \N -v.xxxx TEXT Yes false \N +v.a SMALLINT Yes false \N NONE +v.b JSON Yes false \N NONE +v.c.c SMALLINT Yes false \N NONE +v.c.e DOUBLE Yes false \N NONE +v.xxxx TEXT Yes false \N NONE -- !sql_7_1 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.xxxx TEXT Yes false \N +v.a SMALLINT Yes false \N NONE +v.xxxx TEXT Yes false \N NONE -- !sql_7_2 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.b JSON Yes false \N -v.c.c SMALLINT Yes false \N -v.c.e DOUBLE Yes false \N +v.a SMALLINT Yes false \N NONE +v.b JSON Yes false \N NONE +v.c.c SMALLINT Yes false \N NONE +v.c.e DOUBLE Yes false \N NONE -- !sql_7_3 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.b JSON Yes false \N -v.c.c SMALLINT Yes false \N -v.c.e DOUBLE Yes false \N -v.xxxx TEXT Yes false \N +v.a SMALLINT Yes false \N NONE +v.b JSON Yes false \N NONE +v.c.c SMALLINT Yes false \N NONE +v.c.e DOUBLE Yes false \N NONE +v.xxxx TEXT Yes false \N NONE -- !sql_8 -- k BIGINT Yes true \N v1 VARIANT Yes false \N NONE v2 VARIANT Yes false \N NONE v3 VARIANT Yes false \N NONE -v1.a SMALLINT Yes false \N -v1.b JSON Yes false \N -v1.c.c SMALLINT Yes false \N -v1.c.e DOUBLE Yes false \N -v1.oooo.xxxx.xxx TINYINT Yes false \N -v2.a SMALLINT Yes false \N -v2.xxxx TEXT Yes false \N -v3.a SMALLINT Yes false \N -v3.b JSON Yes false \N -v3.c.c SMALLINT Yes false \N -v3.c.e DOUBLE Yes false \N +v1.a SMALLINT Yes false \N NONE +v1.b JSON Yes false \N NONE +v1.c.c SMALLINT Yes false \N NONE +v1.c.e DOUBLE Yes false \N NONE +v1.oooo.xxxx.xxx TINYINT Yes false \N NONE +v2.a SMALLINT Yes false \N NONE +v2.xxxx TEXT Yes false \N NONE +v3.a SMALLINT Yes false \N NONE +v3.b JSON Yes false \N NONE +v3.c.c SMALLINT Yes false \N NONE +v3.c.e DOUBLE Yes false \N NONE -- !sql_9 -- k BIGINT Yes true \N @@ -111,88 +111,88 @@ v VARIANT Yes false \N NONE -- !sql_9_1 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.b JSON Yes false \N -v.c.c SMALLINT Yes false \N -v.c.e DOUBLE Yes false \N -v.oooo.xxxx.xxx TINYINT Yes false \N +v.a SMALLINT Yes false \N NONE +v.b JSON Yes false \N NONE +v.c.c SMALLINT Yes false \N NONE +v.c.e DOUBLE Yes false \N NONE +v.oooo.xxxx.xxx TINYINT Yes false \N NONE -- !sql_10 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.k1 TINYINT Yes false \N -v.k2 TEXT Yes false \N -v.k3 ARRAY<SMALLINT> Yes false [] -v.k4 DOUBLE Yes false \N -v.k5 JSON Yes false \N +v.k1 TINYINT Yes false \N NONE +v.k2 TEXT Yes false \N NONE +v.k3 ARRAY<SMALLINT> Yes false [] NONE +v.k4 DOUBLE Yes false \N NONE +v.k5 JSON Yes false \N NONE -- !sql_10_1 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE v2 VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.b JSON Yes false \N -v.c.c SMALLINT Yes false \N -v.c.e DOUBLE Yes false \N -v.k1 TINYINT Yes false \N -v.k2 TEXT Yes false \N -v.k3 ARRAY<SMALLINT> Yes false [] -v.k4 DOUBLE Yes false \N -v.k5 JSON Yes false \N -v.oooo.xxxx.xxx TINYINT Yes false \N -v2.a SMALLINT Yes false \N -v2.b JSON Yes false \N -v2.c.c SMALLINT Yes false \N -v2.c.e DOUBLE Yes false \N -v2.oooo.xxxx.xxx TINYINT Yes false \N +v.a SMALLINT Yes false \N NONE +v.b JSON Yes false \N NONE +v.c.c SMALLINT Yes false \N NONE +v.c.e DOUBLE Yes false \N NONE +v.k1 TINYINT Yes false \N NONE +v.k2 TEXT Yes false \N NONE +v.k3 ARRAY<SMALLINT> Yes false [] NONE +v.k4 DOUBLE Yes false \N NONE +v.k5 JSON Yes false \N NONE +v.oooo.xxxx.xxx TINYINT Yes false \N NONE +v2.a SMALLINT Yes false \N NONE +v2.b JSON Yes false \N NONE +v2.c.c SMALLINT Yes false \N NONE +v2.c.e DOUBLE Yes false \N NONE +v2.oooo.xxxx.xxx TINYINT Yes false \N NONE -- !sql_10_2 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.b JSON Yes false \N -v.c.c SMALLINT Yes false \N -v.c.e DOUBLE Yes false \N -v.k1 TINYINT Yes false \N -v.k2 TEXT Yes false \N -v.k3 ARRAY<SMALLINT> Yes false [] -v.k4 DOUBLE Yes false \N -v.k5 JSON Yes false \N -v.oooo.xxxx.xxx TINYINT Yes false \N -v2.a SMALLINT Yes false \N -v2.b JSON Yes false \N -v2.c.c SMALLINT Yes false \N -v2.c.e DOUBLE Yes false \N -v2.oooo.xxxx.xxx TINYINT Yes false \N +v.a SMALLINT Yes false \N NONE +v.b JSON Yes false \N NONE +v.c.c SMALLINT Yes false \N NONE +v.c.e DOUBLE Yes false \N NONE +v.k1 TINYINT Yes false \N NONE +v.k2 TEXT Yes false \N NONE +v.k3 ARRAY<SMALLINT> Yes false [] NONE +v.k4 DOUBLE Yes false \N NONE +v.k5 JSON Yes false \N NONE +v.oooo.xxxx.xxx TINYINT Yes false \N NONE +v2.a SMALLINT Yes false \N NONE +v2.b JSON Yes false \N NONE +v2.c.c SMALLINT Yes false \N NONE +v2.c.e DOUBLE Yes false \N NONE +v2.oooo.xxxx.xxx TINYINT Yes false \N NONE -- !sql_10_3 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE v3 VARIANT Yes false \N NONE -v.a SMALLINT Yes false \N -v.b JSON Yes false \N -v.c.c SMALLINT Yes false \N -v.c.e DOUBLE Yes false \N -v.k1 TINYINT Yes false \N -v.k2 TEXT Yes false \N -v.k3 ARRAY<SMALLINT> Yes false [] -v.k4 DOUBLE Yes false \N -v.k5 JSON Yes false \N -v.oooo.xxxx.xxx TINYINT Yes false \N -v3.a SMALLINT Yes false \N -v3.b JSON Yes false \N -v3.c.c SMALLINT Yes false \N -v3.c.e DOUBLE Yes false \N -v3.oooo.xxxx.xxx TINYINT Yes false \N +v.a SMALLINT Yes false \N NONE +v.b JSON Yes false \N NONE +v.c.c SMALLINT Yes false \N NONE +v.c.e DOUBLE Yes false \N NONE +v.k1 TINYINT Yes false \N NONE +v.k2 TEXT Yes false \N NONE +v.k3 ARRAY<SMALLINT> Yes false [] NONE +v.k4 DOUBLE Yes false \N NONE +v.k5 JSON Yes false \N NONE +v.oooo.xxxx.xxx TINYINT Yes false \N NONE +v3.a SMALLINT Yes false \N NONE +v3.b JSON Yes false \N NONE +v3.c.c SMALLINT Yes false \N NONE +v3.c.e DOUBLE Yes false \N NONE +v3.oooo.xxxx.xxx TINYINT Yes false \N NONE -- !sql_11 -- k BIGINT Yes true \N v VARIANT Yes false \N NONE -v.!@#^&*() TEXT Yes false \N -v.名字 TEXT Yes false \N -v.画像.丬文 TEXT Yes false \N -v.画像.地址 TEXT Yes false \N -v.金额 SMALLINT Yes false \N +v.!@#^&*() TEXT Yes false \N NONE +v.名字 TEXT Yes false \N NONE +v.画像.丬文 TEXT Yes false \N NONE +v.画像.地址 TEXT Yes false \N NONE +v.金额 SMALLINT Yes false \N NONE -- !sql_12 -- k BIGINT Yes true \N diff --git a/regression-test/suites/variant_p0/agg.groovy b/regression-test/suites/variant_p0/agg.groovy new file mode 100644 index 00000000000..b5010ee7f72 --- /dev/null +++ b/regression-test/suites/variant_p0/agg.groovy @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_agg"){ + sql """DROP TABLE IF EXISTS var_agg""" + sql """ + CREATE TABLE IF NOT EXISTS var_agg ( + k bigint, + v variant replace, + s bigint sum + ) + AGGREGATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 4 + properties("replication_num" = "1", "disable_auto_compaction" = "false"); + """ + sql """insert into var_agg values (1, '[1]', 1),(1, '{"a" : 1}', 1);""" + sql """insert into var_agg values (2, '[2]', 2),(1, '{"a" : [[[1]]]}', 2);""" + sql """insert into var_agg values (3, '3', 3),(1, '{"a" : 1}', 3), (1, '{"a" : [1]}', 3);""" + sql """insert into var_agg values (4, '"4"', 4),(1, '{"a" : "1223"}', 4);""" + sql """insert into var_agg values (5, '5', 5),(1, '{"a" : [1]}', 5);""" + sql """insert into var_agg values (6, '"[6]"', 6),(1, '{"a" : ["1", 2, 1.1]}', 6);""" + sql """insert into var_agg values (7, '7', 7),(1, '{"a" : 1, "b" : {"c" : 1}}', 7);""" + sql """insert into var_agg values (8, '8.11111', 8),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}', 8);""" + sql """insert into var_agg values (9, '"9999"', 9),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}', 9);""" + sql """insert into var_agg values (10, '1000000', 10),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}', 10);""" + sql """insert into var_agg values (11, '[123.0]', 11),(1999, '{"a" : 1, "b" : {"c" : 1}}', 11),(19921, '{"a" : 1, "d" : 10}', 11);""" + sql """insert into var_agg values (12, '[123.2]', 12),(1022, '{"a" : 1, "b" : {"f" : 17034, "g" :1.111 }}', 12),(1029, '{"a" : 1, "b" : {"c" : 1}}', 12);""" + qt_sql1 "select k, cast(v['a'] as array<int>) from var_agg where size(cast(v['a'] as array<int>)) > 0 order by k, cast(v['a'] as string) asc" + qt_sql2 "select k, cast(v as int), cast(v['b'] as string) from var_agg where length(cast(v['b'] as string)) > 4 order by k, cast(v as string), cast(v['b'] as string) " + qt_sql3 "select k, v from var_agg order by k, cast(v as string) limit 5" + qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k,cast(v as string) desc limit 10000;" + qt_sql5 "select v['b'] from var_agg where cast(v['b'] as int) > 0;" + qt_sql6 "select cast(v['b'] as string) from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) " + qt_sql7 "select * from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) " + qt_sql8 "select * from var_agg order by 1, cast(2 as string), 3" + sql "alter table var_agg drop column s" + sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str + union all select 5, '{"a": 1123}' as json_str union all select *, '{"a": 11245, "x" : 42005}' as json_str from numbers("number" = "1024") limit 1024;""" + sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str + union all select 5, '{"a": 1123}' as json_str union all select *, '{"a": 11245, "y" : 11111111}' as json_str from numbers("number" = "2048") where number > 1024 limit 1024;""" + sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str + union all select 5, '{"a": 1123}' as json_str union all select *, '{"a": 11245, "c" : 1.11}' as json_str from numbers("number" = "1024") limit 1024;""" + sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str + union all select 5, '{"a": 1123}' as json_str union all select *, '{"a": 11245, "e" : [123456]}' as json_str from numbers("number" = "1024") limit 1024;""" + sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str + union all select 5, '{"a": 1123}' as json_str union all select *, '{"a": 11245, "f" : ["123456"]}' as json_str from numbers("number" = "1024") limit 1024;""" + qt_sql9 "select * from var_agg order by cast(2 as string), 3, 1 limit 10" + qt_sql9 "select * from var_agg where k > 1024 order by cast(2 as string), 3, 1 limit 10" +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/compaction/test_compaction.groovy b/regression-test/suites/variant_p0/compaction/test_compaction.groovy index e5359f305fc..48d916e38e3 100644 --- a/regression-test/suites/variant_p0/compaction/test_compaction.groovy +++ b/regression-test/suites/variant_p0/compaction/test_compaction.groovy @@ -41,10 +41,14 @@ suite("test_compaction_variant") { } def create_table = { tableName, buckets="auto", key_type="DUPLICATE" -> sql "DROP TABLE IF EXISTS ${tableName}" + def var_def = "variant" + if (key_type == "AGGREGATE") { + var_def = "variant replace" + } sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( k bigint, - v variant + v ${var_def} ) ${key_type} KEY(`k`) DISTRIBUTED BY HASH(k) BUCKETS ${buckets} @@ -52,7 +56,8 @@ suite("test_compaction_variant") { """ } - def key_types = ["DUPLICATE", "UNIQUE"] + def key_types = ["DUPLICATE", "UNIQUE", "AGGREGATE"] + // def key_types = ["AGGREGATE"] for (int i = 0; i < key_types.size(); i++) { def tableName = "simple_variant_${key_types[i]}" // 1. simple cases @@ -62,7 +67,7 @@ suite("test_compaction_variant") { sql """insert into ${tableName} values (2, '{"a" : "1"}'),(14, '{"a" : [[[1]]]}');""" sql """insert into ${tableName} values (3, '{"x" : [3]}'),(15, '{"a" : 1}')""" sql """insert into ${tableName} values (4, '{"y": 1}'),(16, '{"a" : "1223"}');""" - sql """insert into ${tableName} values (5, '{"z" : 2}'),(17, '{"a" : [1]}');""" + sql """insert into ${tableName} values (5, '{"z" : 2.0}'),(17, '{"a" : [1]}');""" sql """insert into ${tableName} values (6, '{"x" : 111}'),(18, '{"a" : ["1", 2, 1.1]}');""" sql """insert into ${tableName} values (7, '{"m" : 1}'),(19, '{"a" : 1, "b" : {"c" : 1}}');""" sql """insert into ${tableName} values (8, '{"l" : 2}'),(20, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org