This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 341822ec05d [regression-test](Variant) add compaction case for variant and fix bugs (#28066) 341822ec05d is described below commit 341822ec05dbac6f14491cf62f5eac4ddbe633cd Author: lihangyu <15605149...@163.com> AuthorDate: Fri Dec 8 12:18:46 2023 +0800 [regression-test](Variant) add compaction case for variant and fix bugs (#28066) --- .../rowset/segment_v2/hierarchical_data_reader.cpp | 5 +- .../rowset/segment_v2/hierarchical_data_reader.h | 7 +- be/src/olap/rowset/segment_v2/segment.cpp | 6 +- be/src/vec/functions/function_cast.h | 18 +- be/src/vec/json/path_in_data.cpp | 11 +- be/src/vec/json/path_in_data.h | 3 +- .../data/variant_p0/compaction/test_compaction.out | 281 +++++++++++++++++++++ regression-test/data/variant_p0/load.out | 6 +- .../variant_p0/compaction/test_compaction.groovy | 143 +++++++++++ regression-test/suites/variant_p0/load.groovy | 10 +- 10 files changed, 464 insertions(+), 26 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp index 69e18cb14ca..09764321223 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp @@ -32,11 +32,12 @@ namespace doris { namespace segment_v2 { Status HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader, + vectorized::PathInData path, const SubcolumnColumnReaders::Node* node, const SubcolumnColumnReaders::Node* root, bool output_as_raw_json) { // None leave node need merge with root - auto* stream_iter = new HierarchicalDataReader(node->path, output_as_raw_json); + auto* stream_iter = new HierarchicalDataReader(path, output_as_raw_json); std::vector<const SubcolumnColumnReaders::Node*> leaves; vectorized::PathsInData leaves_paths; SubcolumnColumnReaders::get_leaves_of_node(node, leaves, leaves_paths); @@ -181,7 +182,7 @@ Status ExtractReader::extract_to(vectorized::MutableColumnPtr& dst, size_t nrows // since some other column may depend on it. vectorized::MutableColumnPtr extracted_column; RETURN_IF_ERROR(root.extract_root( // trim the root name, eg. v.a.b -> a.b - _col.path_info().pop_front(), extracted_column)); + _col.path_info().copy_pop_front(), extracted_column)); if (variant.empty() || variant.is_null_root()) { variant.create_root(root.get_root_type(), std::move(extracted_column)); } else { diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h index 9b8b25b26b0..58da2a43435 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h @@ -66,7 +66,7 @@ public: HierarchicalDataReader(const vectorized::PathInData& path, bool output_as_raw_json = false) : _path(path), _output_as_raw_json(output_as_raw_json) {} - static Status create(std::unique_ptr<ColumnIterator>* reader, + static Status create(std::unique_ptr<ColumnIterator>* reader, vectorized::PathInData path, const SubcolumnColumnReaders::Node* target_node, const SubcolumnColumnReaders::Node* root, bool output_as_raw_json = false); @@ -143,8 +143,9 @@ private: RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) { vectorized::MutableColumnPtr column = node.data.column->get_ptr(); - bool add = container_variant.add_sub_column(node.path.pop_front(), std::move(column), - node.data.type); + bool add = container_variant.add_sub_column( + node.path.copy_pop_nfront(_path.get_parts().size()), std::move(column), + node.data.type); if (!add) { return Status::InternalError("Duplicated {}, type {}", node.path.get_path(), node.data.type->get_name()); diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index f907bca674d..e3d1ce0c599 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -445,7 +445,8 @@ Status Segment::new_column_iterator_with_path(const TabletColumn& tablet_column, // Alter table operation should read the whole variant column, since it does not aware of // subcolumns of variant during processing rewriting rowsets. // This is slow, since it needs to read all sub columns and merge them into a single column - RETURN_IF_ERROR(HierarchicalDataReader::create(iter, node, root, output_as_raw_json)); + RETURN_IF_ERROR(HierarchicalDataReader::create(iter, tablet_column.path_info(), node, root, + output_as_raw_json)); return Status::OK(); } @@ -475,7 +476,8 @@ Status Segment::new_column_iterator_with_path(const TabletColumn& tablet_column, iter->reset(it); } else if (node != nullptr && !node->children.empty()) { // Create reader with hirachical data - RETURN_IF_ERROR(HierarchicalDataReader::create(iter, node, root)); + RETURN_IF_ERROR( + HierarchicalDataReader::create(iter, tablet_column.path_info(), node, root)); } else { // If file only exist column `v.a` and `v` but target path is `v.b`, read only read and parse root column if (root == nullptr) { diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 301c5d3725b..48428333c77 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -2070,8 +2070,16 @@ private: if (!variant.is_finalized()) { variant.assume_mutable()->finalize(); } - - if (variant.is_scalar_variant()) { + // It's important to convert as many elements as possible in this context. For instance, + // if the root of this variant column is a number column, converting it to a number column + // is acceptable. However, if the destination type is a string and root is none scalar root, then + // we should convert the entire tree to a string. + bool is_root_valuable = + variant.is_scalar_variant() || + (!variant.is_null_root() && + !WhichDataType(remove_nullable(variant.get_root_type())).is_nothing() && + !WhichDataType(data_type_to).is_string()); + if (is_root_valuable) { ColumnPtr nested = variant.get_root(); auto nested_from_type = variant.get_root_type(); // DCHECK(nested_from_type->is_nullable()); @@ -2104,12 +2112,6 @@ private: col_to = make_nullable(col_to, true); } else if (!data_type_to->is_nullable() && !WhichDataType(data_type_to).is_string()) { - // Could not cast to any other types when it hierarchical like '{"a" : 1}' - // TODO we should convert as many as possible here, for examle - // this variant column's root is a number column, to convert to number column - // is also acceptable - // return Status::InvalidArgument(fmt::format("Could not cast from variant to {}", - // data_type_to->get_name())); col_to->assume_mutable()->insert_many_defaults(input_rows_count); col_to = make_nullable(col_to, true); } else if (WhichDataType(data_type_to).is_string()) { diff --git a/be/src/vec/json/path_in_data.cpp b/be/src/vec/json/path_in_data.cpp index 3eab57f7812..1c02febd446 100644 --- a/be/src/vec/json/path_in_data.cpp +++ b/be/src/vec/json/path_in_data.cpp @@ -150,11 +150,18 @@ size_t PathInData::Hash::operator()(const PathInData& value) const { return hash.low ^ hash.high; } -PathInData PathInData::pop_front() const { +PathInData PathInData::copy_pop_front() const { + return copy_pop_nfront(1); +} + +PathInData PathInData::copy_pop_nfront(size_t n) const { + if (n >= parts.size()) { + return {}; + } PathInData new_path; Parts new_parts; if (!parts.empty()) { - std::copy(parts.begin() + 1, parts.end(), std::back_inserter(new_parts)); + std::copy(parts.begin() + n, parts.end(), std::back_inserter(new_parts)); } new_path.build_path(new_parts); new_path.build_parts(new_parts); diff --git a/be/src/vec/json/path_in_data.h b/be/src/vec/json/path_in_data.h index 87278c1c925..6531a8bfc6a 100644 --- a/be/src/vec/json/path_in_data.h +++ b/be/src/vec/json/path_in_data.h @@ -75,7 +75,8 @@ public: }; std::string to_jsonpath() const; - PathInData pop_front() const; + PathInData copy_pop_front() const; + PathInData copy_pop_nfront(size_t n) const; void to_protobuf(segment_v2::ColumnPathInfo* pb, int32_t parent_col_unique_id) const; void from_protobuf(const segment_v2::ColumnPathInfo& pb); diff --git a/regression-test/data/variant_p0/compaction/test_compaction.out b/regression-test/data/variant_p0/compaction/test_compaction.out new file mode 100644 index 00000000000..5118c2824e1 --- /dev/null +++ b/regression-test/data/variant_p0/compaction/test_compaction.out @@ -0,0 +1,281 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql_1 -- +1 {"x":[1]} +1 {"x":[1]} +2 {"a":"1"} +2 {"a":"1"} +3 {"x":[3]} +3 {"x":[3]} +4 {"y":1} +4 {"y":1} +5 {"z":2.0} +5 {"z":2.0} +6 {"x":111} +6 {"x":111} +7 {"m":1} +7 {"m":1} +8 {"l":2} +8 {"l":2} +9 {"g":1.11} +9 {"g":1.11} +10 {"z":1.1111} +10 {"z":1.1111} +11 {"sala":0} +11 {"sala":0} +12 {"dddd":0.1} +12 {"dddd":0.1} +13 {"a":1} +13 {"a":1} +14 {"a":[[[1]]]} +14 {"a":[[[1]]]} +15 {"a":1} +15 {"a":1} +16 {"a":"1223"} +16 {"a":"1223"} +17 {"a":[1]} +17 {"a":[1]} +18 {"a":["1",2,1.1]} +18 {"a":["1",2,1.1]} +19 {"b":{"c":1},"a":1} +19 {"b":{"c":1},"a":1} +20 {"b":{"c":[{"a":1}]},"a":1} +20 {"b":{"c":[{"a":1}]},"a":1} +21 {"b":{"c":[{"a":1}]},"a":1} +21 {"b":{"c":[{"a":1}]},"a":1} +22 {"b":{"c":[{"a":1}]},"a":1} +22 {"b":{"c":[{"a":1}]},"a":1} +1022 {"b":10,"a":1} +1022 {"b":10,"a":1} +1029 {"b":{"c":1},"a":1} +1029 {"b":{"c":1},"a":1} +1999 {"b":{"c":1},"a":1} +1999 {"b":{"c":1},"a":1} +19921 {"b":10,"a":1} +19921 {"b":10,"a":1} + +-- !sql_2 -- +14 [null] +14 [null] +17 [1] +17 [1] +18 [1, 2, null] +18 [1, 2, null] + +-- !sql_3 -- +19 1 {"c":1} +19 1 {"c":1} +20 1 {"c":[{"a":1}]} +20 1 {"c":[{"a":1}]} +21 1 {"c":[{"a":1}]} +21 1 {"c":[{"a":1}]} +22 1 {"c":[{"a":1}]} +22 1 {"c":[{"a":1}]} +1029 1 {"c":1} +1029 1 {"c":1} +1999 1 {"c":1} +1999 1 {"c":1} + +-- !sql_5 -- +10 \N +10 \N +{"c":1} 1 +{"c":1} 1 +{"c":1} 1 +{"c":1} 1 +10 \N +10 \N +{"c":[{"a":1}]} [{"a":1}] +{"c":[{"a":1}]} [{"a":1}] + +-- !sql_11 -- +1 {"x":[1]} +1 {"x":[1]} +2 {"a":"1"} +2 {"a":"1"} +3 {"x":[3]} +3 {"x":[3]} +4 {"y":1} +4 {"y":1} +5 {"z":2.0} +5 {"z":2.0} +6 {"x":111} +6 {"x":111} +7 {"m":1} +7 {"m":1} +8 {"l":2} +8 {"l":2} +9 {"g":1.11} +9 {"g":1.11} +10 {"z":1.1111} +10 {"z":1.1111} +11 {"sala":0} +11 {"sala":0} +12 {"dddd":0.1} +12 {"dddd":0.1} +13 {"a":1} +13 {"a":1} +14 {"a":[[[1]]]} +14 {"a":[[[1]]]} +15 {"a":1} +15 {"a":1} +16 {"a":"1223"} +16 {"a":"1223"} +17 {"a":[1]} +17 {"a":[1]} +18 {"a":["1",2,1.1]} +18 {"a":["1",2,1.1]} +19 {"b":{"c":1},"a":1} +19 {"b":{"c":1},"a":1} +20 {"b":{"c":[{"a":1}]},"a":1} +20 {"b":{"c":[{"a":1}]},"a":1} +21 {"b":{"c":[{"a":1}]},"a":1} +21 {"b":{"c":[{"a":1}]},"a":1} +22 {"b":{"c":[{"a":1}]},"a":1} +22 {"b":{"c":[{"a":1}]},"a":1} +1022 {"b":10,"a":1} +1022 {"b":10,"a":1} +1029 {"b":{"c":1},"a":1} +1029 {"b":{"c":1},"a":1} +1999 {"b":{"c":1},"a":1} +1999 {"b":{"c":1},"a":1} +19921 {"b":10,"a":1} +19921 {"b":10,"a":1} + +-- !sql_22 -- +14 [null] +14 [null] +17 [1] +17 [1] +18 [1, 2, null] +18 [1, 2, null] + +-- !sql_33 -- +19 1 {"c":1} +19 1 {"c":1} +20 1 {"c":[{"a":1}]} +20 1 {"c":[{"a":1}]} +21 1 {"c":[{"a":1}]} +21 1 {"c":[{"a":1}]} +22 1 {"c":[{"a":1}]} +22 1 {"c":[{"a":1}]} +1029 1 {"c":1} +1029 1 {"c":1} +1999 1 {"c":1} +1999 1 {"c":1} + +-- !sql_55 -- +10 \N +10 \N +{"c":1} 1 +{"c":1} 1 +{"c":1} 1 +{"c":1} 1 +10 \N +10 \N +{"c":[{"a":1}]} [{"a":1}] +{"c":[{"a":1}]} [{"a":1}] + +-- !sql_1 -- +1 {"x":[1]} +2 {"a":"1"} +3 {"x":[3]} +4 {"y":1} +5 {"z":2.0} +6 {"x":111} +7 {"m":1} +8 {"l":2} +9 {"g":1.11} +10 {"z":1.1111} +11 {"sala":0} +12 {"dddd":0.1} +13 {"a":1} +14 {"a":[[[1]]]} +15 {"a":1} +16 {"a":"1223"} +17 {"a":[1]} +18 {"a":["1",2,1.1]} +19 {"b":{"c":1},"a":1} +20 {"b":{"c":[{"a":1}]},"a":1} +21 {"b":{"c":[{"a":1}]},"a":1} +22 {"b":{"c":[{"a":1}]},"a":1} +1022 {"b":10,"a":1} +1029 {"b":{"c":1},"a":1} +1999 {"b":{"c":1},"a":1} +19921 {"b":10,"a":1} + +-- !sql_2 -- +14 [null] +17 [1] +18 [1, 2, null] + +-- !sql_3 -- +19 1 {"c":1} +20 1 {"c":[{"a":1}]} +21 1 {"c":[{"a":1}]} +22 1 {"c":[{"a":1}]} +1029 1 {"c":1} +1999 1 {"c":1} + +-- !sql_5 -- +10 \N +{"c":1} 1 +{"c":1} 1 +10 \N +{"c":[{"a":1}]} [{"a":1}] +{"c":[{"a":1}]} [{"a":1}] +{"c":[{"a":1}]} [{"a":1}] +{"c":1} 1 +{} \N +{} \N + +-- !sql_11 -- +1 {"x":[1]} +2 {"a":"1"} +3 {"x":[3]} +4 {"y":1} +5 {"z":2.0} +6 {"x":111} +7 {"m":1} +8 {"l":2} +9 {"g":1.11} +10 {"z":1.1111} +11 {"sala":0} +12 {"dddd":0.1} +13 {"a":1} +14 {"a":[[[1]]]} +15 {"a":1} +16 {"a":"1223"} +17 {"a":[1]} +18 {"a":["1",2,1.1]} +19 {"b":{"c":1},"a":1} +20 {"b":{"c":[{"a":1}]},"a":1} +21 {"b":{"c":[{"a":1}]},"a":1} +22 {"b":{"c":[{"a":1}]},"a":1} +1022 {"b":10,"a":1} +1029 {"b":{"c":1},"a":1} +1999 {"b":{"c":1},"a":1} +19921 {"b":10,"a":1} + +-- !sql_22 -- +14 [null] +17 [1] +18 [1, 2, null] + +-- !sql_33 -- +19 1 {"c":1} +20 1 {"c":[{"a":1}]} +21 1 {"c":[{"a":1}]} +22 1 {"c":[{"a":1}]} +1029 1 {"c":1} +1999 1 {"c":1} + +-- !sql_55 -- +10 \N +{"c":1} 1 +{"c":1} 1 +10 \N +{"c":[{"a":1}]} [{"a":1}] +{"c":[{"a":1}]} [{"a":1}] +{"c":[{"a":1}]} [{"a":1}] +{"c":1} 1 + diff --git a/regression-test/data/variant_p0/load.out b/regression-test/data/variant_p0/load.out index 490da2618da..b25d275b56f 100644 --- a/regression-test/data/variant_p0/load.out +++ b/regression-test/data/variant_p0/load.out @@ -149,7 +149,7 @@ [123] -- !sql_25 -- -50000 55000.000000002256 6150000 +50000 55000.00000000374 6150000 -- !sql_26 -- 5000 @@ -175,7 +175,7 @@ [{"a":1},123] -- !sql_30 -- -7.111 [123,{"xx":1}] {"b":{"c":456,"e":7.111}} 456 +7.111 [123,{"xx":1}] {"c":456,"e":7.111} 456 -- !sql_30 -- {"a":1123} @@ -211,7 +211,7 @@ "2023-06-21 16:35:58.468 INFO [sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] c.s.c.a.CustomRequestBodyAdviceAdapter : REQUEST DATA: {String={\\"id\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"token\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"line_items\\":[{\\"id\\":43073082228985,\\"properties\\":{},\\"quantity\\":1000000,\\"variant_id\\":43073082228985,\\"key\\":\\"43073082228985:381f0b4b03d0c76493aa028c4ed006a9\\",\\"discounted_price\\":\\"28.95\\",\ [...] -- !sql_33_1 -- -"2023-06-21 16:35:58.468 INFO [sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] c.s.c.a.CustomRequestBodyAdviceAdapter : REQUEST DATA: {String={\\"id\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"token\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"line_items\\":[{\\"id\\":43073082228985,\\"properties\\":{},\\"quantity\\":1000000,\\"variant_id\\":43073082228985,\\"key\\":\\"43073082228985:381f0b4b03d0c76493aa028c4ed006a9\\",\\"discounted_price\\":\\"28.95\\",\ [...] +2023-06-21 16:35:58.468 INFO [sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] c.s.c.a.CustomRequestBodyAdviceAdapter : REQUEST DATA: {String={"id":"3293880a36cd163754ea4f90270331f6","token":"3293880a36cd163754ea4f90270331f6","line_items":[{"id":43073082228985,"properties":{},"quantity":1000000,"variant_id":43073082228985,"key":"43073082228985:381f0b4b03d0c76493aa028c4ed006a9","discounted_price":"28.95","discounts":[],"gift_card":false,"grams":0,"line_price [...] -- !sql_34 -- "2023-06-21 16:35:58.468 INFO [sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] c.s.c.a.CustomRequestBodyAdviceAdapter : REQUEST DATA: {String={\\"id\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"token\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"line_items\\":[{\\"id\\":43073082228985,\\"properties\\":{},\\"quantity\\":1000000,\\"variant_id\\":43073082228985,\\"key\\":\\"43073082228985:381f0b4b03d0c76493aa028c4ed006a9\\",\\"discounted_price\\":\\"28.95\\",\ [...] diff --git a/regression-test/suites/variant_p0/compaction/test_compaction.groovy b/regression-test/suites/variant_p0/compaction/test_compaction.groovy new file mode 100644 index 00000000000..199fc4aa28c --- /dev/null +++ b/regression-test/suites/variant_p0/compaction/test_compaction.groovy @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_compaction_variant") { + try { + String backend_id; + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + backend_id = backendId_to_backendIP.keySet()[0] + def (code, out, err) = show_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id)) + + logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def configList = parseJson(out.trim()) + assert configList instanceof List + + boolean disableAutoCompaction = true + for (Object ele in (List) configList) { + assert ele instanceof List<String> + if (((List<String>) ele)[0] == "disable_auto_compaction") { + disableAutoCompaction = Boolean.parseBoolean(((List<String>) ele)[2]) + } + } + def create_table = { tableName, buckets="auto", key_type="DUPLICATE" -> + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + k bigint, + v variant + ) + ${key_type} KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS ${buckets} + properties("replication_num" = "1", "disable_auto_compaction" = "false"); + """ + } + + def key_types = ["DUPLICATE", "UNIQUE"] + for (int i = 0; i < key_types.size(); i++) { + def tableName = "simple_variant_${key_types[i]}" + // 1. simple cases + create_table.call(tableName, "1", key_types[i]) + def insert = { + sql """insert into ${tableName} values (1, '{"x" : [1]}'),(13, '{"a" : 1}');""" + sql """insert into ${tableName} values (2, '{"a" : "1"}'),(14, '{"a" : [[[1]]]}');""" + sql """insert into ${tableName} values (3, '{"x" : [3]}'),(15, '{"a" : 1}')""" + sql """insert into ${tableName} values (4, '{"y": 1}'),(16, '{"a" : "1223"}');""" + sql """insert into ${tableName} values (5, '{"z" : 2}'),(17, '{"a" : [1]}');""" + sql """insert into ${tableName} values (6, '{"x" : 111}'),(18, '{"a" : ["1", 2, 1.1]}');""" + sql """insert into ${tableName} values (7, '{"m" : 1}'),(19, '{"a" : 1, "b" : {"c" : 1}}');""" + sql """insert into ${tableName} values (8, '{"l" : 2}'),(20, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + sql """insert into ${tableName} values (9, '{"g" : 1.11}'),(21, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + sql """insert into ${tableName} values (10, '{"z" : 1.1111}'),(22, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + sql """insert into ${tableName} values (11, '{"sala" : 0}'),(1999, '{"a" : 1, "b" : {"c" : 1}}'),(19921, '{"a" : 1, "b" : 10}');""" + sql """insert into ${tableName} values (12, '{"dddd" : 0.1}'),(1022, '{"a" : 1, "b" : 10}'),(1029, '{"a" : 1, "b" : {"c" : 1}}');""" + } + insert.call(); + insert.call(); + qt_sql_1 "SELECT * FROM ${tableName} ORDER BY k, cast(v as string); " + qt_sql_2 "select k, cast(v:a as array<int>) from ${tableName} where size(cast(v:a as array<int>)) > 0 order by k" + qt_sql_3 "select k, v:a, cast(v:b as string) from ${tableName} where length(cast(v:b as string)) > 4 order by k" + // qt_sql_4 "select k, cast(v:b as string), cast(v:a as string), cast(v:c as string) from ${tableName} where order by k limit 5" + qt_sql_5 "select cast(v:b as string), cast(v:b.c as string) from ${tableName} where cast(v:b as string) != 'null' or cast(v:b as string) != '{}' order by k desc limit 10;" + + + //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,QueryHits,PathHash,MetaUrl,CompactionStatus + String[][] tablets = sql """ show tablets from ${tableName}; """ + + // trigger compactions for all tablets in ${tableName} + for (String[] tablet in tablets) { + String tablet_id = tablet[0] + backend_id = tablet[2] + (code, out, err) = be_run_cumulative_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactJson = parseJson(out.trim()) + if (compactJson.status.toLowerCase() == "fail") { + assertEquals(disableAutoCompaction, false) + logger.info("Compaction was done automatically!") + } + if (disableAutoCompaction) { + assertEquals("success", compactJson.status.toLowerCase()) + } + } + + // wait for all compactions done + for (String[] tablet in tablets) { + boolean running = true + do { + Thread.sleep(1000) + String tablet_id = tablet[0] + backend_id = tablet[2] + (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + int rowCount = 0 + for (String[] tablet in tablets) { + String tablet_id = tablet[0] + def compactionStatusUrlIndex = 18 + (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex]) + logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + for (String rowset in (List<String>) tabletJson.rowsets) { + rowCount += Integer.parseInt(rowset.split(" ")[1]) + } + } + assert (rowCount < 8) + qt_sql_11 "SELECT * FROM ${tableName} ORDER BY k, cast(v as string); " + qt_sql_22 "select k, cast(v:a as array<int>) from ${tableName} where size(cast(v:a as array<int>)) > 0 order by k" + qt_sql_33 "select k, v:a, cast(v:b as string) from ${tableName} where length(cast(v:b as string)) > 4 order by k" + // qt_sql_44 "select k, cast(v:b as string), cast(v:a as string), cast(v:c as string) from ${tableName} order by k limit 5" + qt_sql_55 "select cast(v:b as string), cast(v:b.c as string) from ${tableName} where cast(v:b as string) != 'null' and cast(v:b as string) != '{}' order by k desc limit 10;" + } + + } finally { + // try_sql("DROP TABLE IF EXISTS ${tableName}") + } +} diff --git a/regression-test/suites/variant_p0/load.groovy b/regression-test/suites/variant_p0/load.groovy index 57737397b49..e0eb85c9e45 100644 --- a/regression-test/suites/variant_p0/load.groovy +++ b/regression-test/suites/variant_p0/load.groovy @@ -93,7 +93,7 @@ suite("regression_test_variant", "variant_type"){ sql """insert into ${table_name} values (10, '1000000'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" sql """insert into ${table_name} values (11, '[123.0]'),(1999, '{"a" : 1, "b" : {"c" : 1}}'),(19921, '{"a" : 1, "b" : 10}');""" sql """insert into ${table_name} values (12, '[123.2]'),(1022, '{"a" : 1, "b" : 10}'),(1029, '{"a" : 1, "b" : {"c" : 1}}');""" - qt_sql "select k, cast(v:a as array<int>) from ${table_name} where size(cast(v:a as array<int>)) > 0 order by k, cast(v as string);" + qt_sql "select k, cast(v:a as array<int>) from ${table_name} where size(cast(v:a as array<int>)) > 0 order by k, cast(v:a as string) asc" // cast v:b as int should be correct // FIXME: unstable, todo use qt_sql sql "select k, v, cast(v:b as string) from ${table_name} where length(cast(v:b as string)) > 4 order by k, cast(v as string)" @@ -292,23 +292,23 @@ suite("regression_test_variant", "variant_type"){ set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1") load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") qt_sql_32 """ select json_extract(v, "\$.json.parseFailed") from logdata where json_extract(v, "\$.json.parseFailed") != 'null' order by k limit 1;""" - qt_sql_32_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;""" + qt_sql_32_1 """select cast(v:json.parseFailed as string) from logdata where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;""" sql "truncate table ${table_name}" // 0.95 default ratio set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") qt_sql_33 """ select json_extract(v,"\$.json.parseFailed") from logdata where json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 1;""" - qt_sql_33_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;""" + qt_sql_33_1 """select cast(v:json.parseFailed as string) from logdata where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;""" sql "truncate table ${table_name}" // always sparse column - set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.85") + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") qt_sql_34 """ select json_extract(v, "\$.json.parseFailed") from logdata where json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 1;""" sql "truncate table ${table_name}" qt_sql_35 """select json_extract(v,"\$.json.parseFailed") from logdata where k = 162 and json_extract(v,"\$.json.parseFailed") != 'null';""" - qt_sql_35_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;""" + qt_sql_35_1 """select cast(v:json.parseFailed as string) from logdata where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;""" // TODO add test case that some certain columns are materialized in some file while others are not materilized(sparse) // unique table --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org