This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 341822ec05d [regression-test](Variant) add compaction case for variant 
and fix bugs (#28066)
341822ec05d is described below

commit 341822ec05dbac6f14491cf62f5eac4ddbe633cd
Author: lihangyu <15605149...@163.com>
AuthorDate: Fri Dec 8 12:18:46 2023 +0800

    [regression-test](Variant) add compaction case for variant and fix bugs 
(#28066)
---
 .../rowset/segment_v2/hierarchical_data_reader.cpp |   5 +-
 .../rowset/segment_v2/hierarchical_data_reader.h   |   7 +-
 be/src/olap/rowset/segment_v2/segment.cpp          |   6 +-
 be/src/vec/functions/function_cast.h               |  18 +-
 be/src/vec/json/path_in_data.cpp                   |  11 +-
 be/src/vec/json/path_in_data.h                     |   3 +-
 .../data/variant_p0/compaction/test_compaction.out | 281 +++++++++++++++++++++
 regression-test/data/variant_p0/load.out           |   6 +-
 .../variant_p0/compaction/test_compaction.groovy   | 143 +++++++++++
 regression-test/suites/variant_p0/load.groovy      |  10 +-
 10 files changed, 464 insertions(+), 26 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp 
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
index 69e18cb14ca..09764321223 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
@@ -32,11 +32,12 @@ namespace doris {
 namespace segment_v2 {
 
 Status HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader,
+                                      vectorized::PathInData path,
                                       const SubcolumnColumnReaders::Node* node,
                                       const SubcolumnColumnReaders::Node* root,
                                       bool output_as_raw_json) {
     // None leave node need merge with root
-    auto* stream_iter = new HierarchicalDataReader(node->path, 
output_as_raw_json);
+    auto* stream_iter = new HierarchicalDataReader(path, output_as_raw_json);
     std::vector<const SubcolumnColumnReaders::Node*> leaves;
     vectorized::PathsInData leaves_paths;
     SubcolumnColumnReaders::get_leaves_of_node(node, leaves, leaves_paths);
@@ -181,7 +182,7 @@ Status 
ExtractReader::extract_to(vectorized::MutableColumnPtr& dst, size_t nrows
     // since some other column may depend on it.
     vectorized::MutableColumnPtr extracted_column;
     RETURN_IF_ERROR(root.extract_root( // trim the root name, eg. v.a.b -> a.b
-            _col.path_info().pop_front(), extracted_column));
+            _col.path_info().copy_pop_front(), extracted_column));
     if (variant.empty() || variant.is_null_root()) {
         variant.create_root(root.get_root_type(), std::move(extracted_column));
     } else {
diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h 
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
index 9b8b25b26b0..58da2a43435 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
@@ -66,7 +66,7 @@ public:
     HierarchicalDataReader(const vectorized::PathInData& path, bool 
output_as_raw_json = false)
             : _path(path), _output_as_raw_json(output_as_raw_json) {}
 
-    static Status create(std::unique_ptr<ColumnIterator>* reader,
+    static Status create(std::unique_ptr<ColumnIterator>* reader, 
vectorized::PathInData path,
                          const SubcolumnColumnReaders::Node* target_node,
                          const SubcolumnColumnReaders::Node* root, bool 
output_as_raw_json = false);
 
@@ -143,8 +143,9 @@ private:
 
         RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
             vectorized::MutableColumnPtr column = node.data.column->get_ptr();
-            bool add = container_variant.add_sub_column(node.path.pop_front(), 
std::move(column),
-                                                        node.data.type);
+            bool add = container_variant.add_sub_column(
+                    node.path.copy_pop_nfront(_path.get_parts().size()), 
std::move(column),
+                    node.data.type);
             if (!add) {
                 return Status::InternalError("Duplicated {}, type {}", 
node.path.get_path(),
                                              node.data.type->get_name());
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index f907bca674d..e3d1ce0c599 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -445,7 +445,8 @@ Status Segment::new_column_iterator_with_path(const 
TabletColumn& tablet_column,
         // Alter table operation should read the whole variant column, since 
it does not aware of
         // subcolumns of variant during processing rewriting rowsets.
         // This is slow, since it needs to read all sub columns and merge them 
into a single column
-        RETURN_IF_ERROR(HierarchicalDataReader::create(iter, node, root, 
output_as_raw_json));
+        RETURN_IF_ERROR(HierarchicalDataReader::create(iter, 
tablet_column.path_info(), node, root,
+                                                       output_as_raw_json));
         return Status::OK();
     }
 
@@ -475,7 +476,8 @@ Status Segment::new_column_iterator_with_path(const 
TabletColumn& tablet_column,
         iter->reset(it);
     } else if (node != nullptr && !node->children.empty()) {
         // Create reader with hirachical data
-        RETURN_IF_ERROR(HierarchicalDataReader::create(iter, node, root));
+        RETURN_IF_ERROR(
+                HierarchicalDataReader::create(iter, 
tablet_column.path_info(), node, root));
     } else {
         // If file only exist column `v.a` and `v` but target path is `v.b`, 
read only read and parse root column
         if (root == nullptr) {
diff --git a/be/src/vec/functions/function_cast.h 
b/be/src/vec/functions/function_cast.h
index 301c5d3725b..48428333c77 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -2070,8 +2070,16 @@ private:
             if (!variant.is_finalized()) {
                 variant.assume_mutable()->finalize();
             }
-
-            if (variant.is_scalar_variant()) {
+            // It's important to convert as many elements as possible in this 
context. For instance,
+            // if the root of this variant column is a number column, 
converting it to a number column
+            // is acceptable. However, if the destination type is a string and 
root is none scalar root, then
+            // we should convert the entire tree to a string.
+            bool is_root_valuable =
+                    variant.is_scalar_variant() ||
+                    (!variant.is_null_root() &&
+                     
!WhichDataType(remove_nullable(variant.get_root_type())).is_nothing() &&
+                     !WhichDataType(data_type_to).is_string());
+            if (is_root_valuable) {
                 ColumnPtr nested = variant.get_root();
                 auto nested_from_type = variant.get_root_type();
                 // DCHECK(nested_from_type->is_nullable());
@@ -2104,12 +2112,6 @@ private:
                     col_to = make_nullable(col_to, true);
                 } else if (!data_type_to->is_nullable() &&
                            !WhichDataType(data_type_to).is_string()) {
-                    // Could not cast to any other types when it hierarchical 
like '{"a" : 1}'
-                    // TODO we should convert as many as possible here, for 
examle
-                    // this variant column's root is a number column, to 
convert to number column
-                    // is also acceptable
-                    // return Status::InvalidArgument(fmt::format("Could not 
cast from variant to {}",
-                    //                                            
data_type_to->get_name()));
                     
col_to->assume_mutable()->insert_many_defaults(input_rows_count);
                     col_to = make_nullable(col_to, true);
                 } else if (WhichDataType(data_type_to).is_string()) {
diff --git a/be/src/vec/json/path_in_data.cpp b/be/src/vec/json/path_in_data.cpp
index 3eab57f7812..1c02febd446 100644
--- a/be/src/vec/json/path_in_data.cpp
+++ b/be/src/vec/json/path_in_data.cpp
@@ -150,11 +150,18 @@ size_t PathInData::Hash::operator()(const PathInData& 
value) const {
     return hash.low ^ hash.high;
 }
 
-PathInData PathInData::pop_front() const {
+PathInData PathInData::copy_pop_front() const {
+    return copy_pop_nfront(1);
+}
+
+PathInData PathInData::copy_pop_nfront(size_t n) const {
+    if (n >= parts.size()) {
+        return {};
+    }
     PathInData new_path;
     Parts new_parts;
     if (!parts.empty()) {
-        std::copy(parts.begin() + 1, parts.end(), 
std::back_inserter(new_parts));
+        std::copy(parts.begin() + n, parts.end(), 
std::back_inserter(new_parts));
     }
     new_path.build_path(new_parts);
     new_path.build_parts(new_parts);
diff --git a/be/src/vec/json/path_in_data.h b/be/src/vec/json/path_in_data.h
index 87278c1c925..6531a8bfc6a 100644
--- a/be/src/vec/json/path_in_data.h
+++ b/be/src/vec/json/path_in_data.h
@@ -75,7 +75,8 @@ public:
     };
     std::string to_jsonpath() const;
 
-    PathInData pop_front() const;
+    PathInData copy_pop_front() const;
+    PathInData copy_pop_nfront(size_t n) const;
     void to_protobuf(segment_v2::ColumnPathInfo* pb, int32_t 
parent_col_unique_id) const;
     void from_protobuf(const segment_v2::ColumnPathInfo& pb);
 
diff --git a/regression-test/data/variant_p0/compaction/test_compaction.out 
b/regression-test/data/variant_p0/compaction/test_compaction.out
new file mode 100644
index 00000000000..5118c2824e1
--- /dev/null
+++ b/regression-test/data/variant_p0/compaction/test_compaction.out
@@ -0,0 +1,281 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql_1 --
+1      {"x":[1]}
+1      {"x":[1]}
+2      {"a":"1"}
+2      {"a":"1"}
+3      {"x":[3]}
+3      {"x":[3]}
+4      {"y":1}
+4      {"y":1}
+5      {"z":2.0}
+5      {"z":2.0}
+6      {"x":111}
+6      {"x":111}
+7      {"m":1}
+7      {"m":1}
+8      {"l":2}
+8      {"l":2}
+9      {"g":1.11}
+9      {"g":1.11}
+10     {"z":1.1111}
+10     {"z":1.1111}
+11     {"sala":0}
+11     {"sala":0}
+12     {"dddd":0.1}
+12     {"dddd":0.1}
+13     {"a":1}
+13     {"a":1}
+14     {"a":[[[1]]]}
+14     {"a":[[[1]]]}
+15     {"a":1}
+15     {"a":1}
+16     {"a":"1223"}
+16     {"a":"1223"}
+17     {"a":[1]}
+17     {"a":[1]}
+18     {"a":["1",2,1.1]}
+18     {"a":["1",2,1.1]}
+19     {"b":{"c":1},"a":1}
+19     {"b":{"c":1},"a":1}
+20     {"b":{"c":[{"a":1}]},"a":1}
+20     {"b":{"c":[{"a":1}]},"a":1}
+21     {"b":{"c":[{"a":1}]},"a":1}
+21     {"b":{"c":[{"a":1}]},"a":1}
+22     {"b":{"c":[{"a":1}]},"a":1}
+22     {"b":{"c":[{"a":1}]},"a":1}
+1022   {"b":10,"a":1}
+1022   {"b":10,"a":1}
+1029   {"b":{"c":1},"a":1}
+1029   {"b":{"c":1},"a":1}
+1999   {"b":{"c":1},"a":1}
+1999   {"b":{"c":1},"a":1}
+19921  {"b":10,"a":1}
+19921  {"b":10,"a":1}
+
+-- !sql_2 --
+14     [null]
+14     [null]
+17     [1]
+17     [1]
+18     [1, 2, null]
+18     [1, 2, null]
+
+-- !sql_3 --
+19     1       {"c":1}
+19     1       {"c":1}
+20     1       {"c":[{"a":1}]}
+20     1       {"c":[{"a":1}]}
+21     1       {"c":[{"a":1}]}
+21     1       {"c":[{"a":1}]}
+22     1       {"c":[{"a":1}]}
+22     1       {"c":[{"a":1}]}
+1029   1       {"c":1}
+1029   1       {"c":1}
+1999   1       {"c":1}
+1999   1       {"c":1}
+
+-- !sql_5 --
+10     \N
+10     \N
+{"c":1}        1
+{"c":1}        1
+{"c":1}        1
+{"c":1}        1
+10     \N
+10     \N
+{"c":[{"a":1}]}        [{"a":1}]
+{"c":[{"a":1}]}        [{"a":1}]
+
+-- !sql_11 --
+1      {"x":[1]}
+1      {"x":[1]}
+2      {"a":"1"}
+2      {"a":"1"}
+3      {"x":[3]}
+3      {"x":[3]}
+4      {"y":1}
+4      {"y":1}
+5      {"z":2.0}
+5      {"z":2.0}
+6      {"x":111}
+6      {"x":111}
+7      {"m":1}
+7      {"m":1}
+8      {"l":2}
+8      {"l":2}
+9      {"g":1.11}
+9      {"g":1.11}
+10     {"z":1.1111}
+10     {"z":1.1111}
+11     {"sala":0}
+11     {"sala":0}
+12     {"dddd":0.1}
+12     {"dddd":0.1}
+13     {"a":1}
+13     {"a":1}
+14     {"a":[[[1]]]}
+14     {"a":[[[1]]]}
+15     {"a":1}
+15     {"a":1}
+16     {"a":"1223"}
+16     {"a":"1223"}
+17     {"a":[1]}
+17     {"a":[1]}
+18     {"a":["1",2,1.1]}
+18     {"a":["1",2,1.1]}
+19     {"b":{"c":1},"a":1}
+19     {"b":{"c":1},"a":1}
+20     {"b":{"c":[{"a":1}]},"a":1}
+20     {"b":{"c":[{"a":1}]},"a":1}
+21     {"b":{"c":[{"a":1}]},"a":1}
+21     {"b":{"c":[{"a":1}]},"a":1}
+22     {"b":{"c":[{"a":1}]},"a":1}
+22     {"b":{"c":[{"a":1}]},"a":1}
+1022   {"b":10,"a":1}
+1022   {"b":10,"a":1}
+1029   {"b":{"c":1},"a":1}
+1029   {"b":{"c":1},"a":1}
+1999   {"b":{"c":1},"a":1}
+1999   {"b":{"c":1},"a":1}
+19921  {"b":10,"a":1}
+19921  {"b":10,"a":1}
+
+-- !sql_22 --
+14     [null]
+14     [null]
+17     [1]
+17     [1]
+18     [1, 2, null]
+18     [1, 2, null]
+
+-- !sql_33 --
+19     1       {"c":1}
+19     1       {"c":1}
+20     1       {"c":[{"a":1}]}
+20     1       {"c":[{"a":1}]}
+21     1       {"c":[{"a":1}]}
+21     1       {"c":[{"a":1}]}
+22     1       {"c":[{"a":1}]}
+22     1       {"c":[{"a":1}]}
+1029   1       {"c":1}
+1029   1       {"c":1}
+1999   1       {"c":1}
+1999   1       {"c":1}
+
+-- !sql_55 --
+10     \N
+10     \N
+{"c":1}        1
+{"c":1}        1
+{"c":1}        1
+{"c":1}        1
+10     \N
+10     \N
+{"c":[{"a":1}]}        [{"a":1}]
+{"c":[{"a":1}]}        [{"a":1}]
+
+-- !sql_1 --
+1      {"x":[1]}
+2      {"a":"1"}
+3      {"x":[3]}
+4      {"y":1}
+5      {"z":2.0}
+6      {"x":111}
+7      {"m":1}
+8      {"l":2}
+9      {"g":1.11}
+10     {"z":1.1111}
+11     {"sala":0}
+12     {"dddd":0.1}
+13     {"a":1}
+14     {"a":[[[1]]]}
+15     {"a":1}
+16     {"a":"1223"}
+17     {"a":[1]}
+18     {"a":["1",2,1.1]}
+19     {"b":{"c":1},"a":1}
+20     {"b":{"c":[{"a":1}]},"a":1}
+21     {"b":{"c":[{"a":1}]},"a":1}
+22     {"b":{"c":[{"a":1}]},"a":1}
+1022   {"b":10,"a":1}
+1029   {"b":{"c":1},"a":1}
+1999   {"b":{"c":1},"a":1}
+19921  {"b":10,"a":1}
+
+-- !sql_2 --
+14     [null]
+17     [1]
+18     [1, 2, null]
+
+-- !sql_3 --
+19     1       {"c":1}
+20     1       {"c":[{"a":1}]}
+21     1       {"c":[{"a":1}]}
+22     1       {"c":[{"a":1}]}
+1029   1       {"c":1}
+1999   1       {"c":1}
+
+-- !sql_5 --
+10     \N
+{"c":1}        1
+{"c":1}        1
+10     \N
+{"c":[{"a":1}]}        [{"a":1}]
+{"c":[{"a":1}]}        [{"a":1}]
+{"c":[{"a":1}]}        [{"a":1}]
+{"c":1}        1
+{}     \N
+{}     \N
+
+-- !sql_11 --
+1      {"x":[1]}
+2      {"a":"1"}
+3      {"x":[3]}
+4      {"y":1}
+5      {"z":2.0}
+6      {"x":111}
+7      {"m":1}
+8      {"l":2}
+9      {"g":1.11}
+10     {"z":1.1111}
+11     {"sala":0}
+12     {"dddd":0.1}
+13     {"a":1}
+14     {"a":[[[1]]]}
+15     {"a":1}
+16     {"a":"1223"}
+17     {"a":[1]}
+18     {"a":["1",2,1.1]}
+19     {"b":{"c":1},"a":1}
+20     {"b":{"c":[{"a":1}]},"a":1}
+21     {"b":{"c":[{"a":1}]},"a":1}
+22     {"b":{"c":[{"a":1}]},"a":1}
+1022   {"b":10,"a":1}
+1029   {"b":{"c":1},"a":1}
+1999   {"b":{"c":1},"a":1}
+19921  {"b":10,"a":1}
+
+-- !sql_22 --
+14     [null]
+17     [1]
+18     [1, 2, null]
+
+-- !sql_33 --
+19     1       {"c":1}
+20     1       {"c":[{"a":1}]}
+21     1       {"c":[{"a":1}]}
+22     1       {"c":[{"a":1}]}
+1029   1       {"c":1}
+1999   1       {"c":1}
+
+-- !sql_55 --
+10     \N
+{"c":1}        1
+{"c":1}        1
+10     \N
+{"c":[{"a":1}]}        [{"a":1}]
+{"c":[{"a":1}]}        [{"a":1}]
+{"c":[{"a":1}]}        [{"a":1}]
+{"c":1}        1
+
diff --git a/regression-test/data/variant_p0/load.out 
b/regression-test/data/variant_p0/load.out
index 490da2618da..b25d275b56f 100644
--- a/regression-test/data/variant_p0/load.out
+++ b/regression-test/data/variant_p0/load.out
@@ -149,7 +149,7 @@
 [123]
 
 -- !sql_25 --
-50000  55000.000000002256      6150000
+50000  55000.00000000374       6150000
 
 -- !sql_26 --
 5000
@@ -175,7 +175,7 @@
 [{"a":1},123]
 
 -- !sql_30 --
-7.111  [123,{"xx":1}]  {"b":{"c":456,"e":7.111}}       456
+7.111  [123,{"xx":1}]  {"c":456,"e":7.111}     456
 
 -- !sql_30 --
 {"a":1123}
@@ -211,7 +211,7 @@
 "2023-06-21 16:35:58.468  INFO 
[sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] 
c.s.c.a.CustomRequestBodyAdviceAdapter   : REQUEST DATA: 
{String={\\"id\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"token\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"line_items\\":[{\\"id\\":43073082228985,\\"properties\\":{},\\"quantity\\":1000000,\\"variant_id\\":43073082228985,\\"key\\":\\"43073082228985:381f0b4b03d0c76493aa028c4ed006a9\\",\\"discounted_price\\":\\"28.95\\",\
 [...]
 
 -- !sql_33_1 --
-"2023-06-21 16:35:58.468  INFO 
[sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] 
c.s.c.a.CustomRequestBodyAdviceAdapter   : REQUEST DATA: 
{String={\\"id\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"token\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"line_items\\":[{\\"id\\":43073082228985,\\"properties\\":{},\\"quantity\\":1000000,\\"variant_id\\":43073082228985,\\"key\\":\\"43073082228985:381f0b4b03d0c76493aa028c4ed006a9\\",\\"discounted_price\\":\\"28.95\\",\
 [...]
+2023-06-21 16:35:58.468  INFO 
[sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] 
c.s.c.a.CustomRequestBodyAdviceAdapter   : REQUEST DATA: 
{String={"id":"3293880a36cd163754ea4f90270331f6","token":"3293880a36cd163754ea4f90270331f6","line_items":[{"id":43073082228985,"properties":{},"quantity":1000000,"variant_id":43073082228985,"key":"43073082228985:381f0b4b03d0c76493aa028c4ed006a9","discounted_price":"28.95","discounts":[],"gift_card":false,"grams":0,"line_price
 [...]
 
 -- !sql_34 --
 "2023-06-21 16:35:58.468  INFO 
[sino-webhook,6dee61f0605a70f3,6dee61f0605a70f3,true] 1 --- [io-8001-exec-13] 
c.s.c.a.CustomRequestBodyAdviceAdapter   : REQUEST DATA: 
{String={\\"id\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"token\\":\\"3293880a36cd163754ea4f90270331f6\\",\\"line_items\\":[{\\"id\\":43073082228985,\\"properties\\":{},\\"quantity\\":1000000,\\"variant_id\\":43073082228985,\\"key\\":\\"43073082228985:381f0b4b03d0c76493aa028c4ed006a9\\",\\"discounted_price\\":\\"28.95\\",\
 [...]
diff --git 
a/regression-test/suites/variant_p0/compaction/test_compaction.groovy 
b/regression-test/suites/variant_p0/compaction/test_compaction.groovy
new file mode 100644
index 00000000000..199fc4aa28c
--- /dev/null
+++ b/regression-test/suites/variant_p0/compaction/test_compaction.groovy
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_compaction_variant") {
+    try {
+        String backend_id;
+        def backendId_to_backendIP = [:]
+        def backendId_to_backendHttpPort = [:]
+        getBackendIpHttpPort(backendId_to_backendIP, 
backendId_to_backendHttpPort);
+
+        backend_id = backendId_to_backendIP.keySet()[0]
+        def (code, out, err) = 
show_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id))
+        
+        logger.info("Show config: code=" + code + ", out=" + out + ", err=" + 
err)
+        assertEquals(code, 0)
+        def configList = parseJson(out.trim())
+        assert configList instanceof List
+
+        boolean disableAutoCompaction = true
+        for (Object ele in (List) configList) {
+            assert ele instanceof List<String>
+            if (((List<String>) ele)[0] == "disable_auto_compaction") {
+                disableAutoCompaction = Boolean.parseBoolean(((List<String>) 
ele)[2])
+            }
+        }
+        def create_table = { tableName, buckets="auto", key_type="DUPLICATE" ->
+            sql "DROP TABLE IF EXISTS ${tableName}"
+            sql """
+                CREATE TABLE IF NOT EXISTS ${tableName} (
+                    k bigint,
+                    v variant
+                )
+                ${key_type} KEY(`k`)
+                DISTRIBUTED BY HASH(k) BUCKETS ${buckets}
+                properties("replication_num" = "1", "disable_auto_compaction" 
= "false");
+            """
+        }
+
+        def key_types = ["DUPLICATE", "UNIQUE"]
+        for (int i = 0; i < key_types.size(); i++) {
+            def tableName = "simple_variant_${key_types[i]}"
+            // 1. simple cases
+            create_table.call(tableName, "1", key_types[i])
+            def insert = {
+                sql """insert into ${tableName} values (1,  '{"x" : 
[1]}'),(13,  '{"a" : 1}');"""
+                sql """insert into ${tableName} values (2,  '{"a" : 
"1"}'),(14,  '{"a" : [[[1]]]}');"""
+                sql """insert into ${tableName} values (3,  '{"x" : 
[3]}'),(15,  '{"a" : 1}')"""
+                sql """insert into ${tableName} values (4,  '{"y": 1}'),(16,  
'{"a" : "1223"}');"""
+                sql """insert into ${tableName} values (5,  '{"z" : 2}'),(17,  
'{"a" : [1]}');"""
+                sql """insert into ${tableName} values (6,  '{"x" : 
111}'),(18,  '{"a" : ["1", 2, 1.1]}');"""
+                sql """insert into ${tableName} values (7,  '{"m" : 1}'),(19,  
'{"a" : 1, "b" : {"c" : 1}}');"""
+                sql """insert into ${tableName} values (8,  '{"l" : 2}'),(20,  
'{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');"""
+                sql """insert into ${tableName} values (9,  '{"g" : 
1.11}'),(21,  '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');"""
+                sql """insert into ${tableName} values (10, '{"z" : 
1.1111}'),(22,  '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');"""
+                sql """insert into ${tableName} values (11, '{"sala" : 
0}'),(1999,  '{"a" : 1, "b" : {"c" : 1}}'),(19921,  '{"a" : 1, "b" : 10}');"""
+                sql """insert into ${tableName} values (12, '{"dddd" : 
0.1}'),(1022,  '{"a" : 1, "b" : 10}'),(1029,  '{"a" : 1, "b" : {"c" : 1}}');"""
+            }
+            insert.call();
+            insert.call();
+            qt_sql_1 "SELECT * FROM ${tableName} ORDER BY k, cast(v as 
string); "
+            qt_sql_2 "select k, cast(v:a as array<int>) from  ${tableName} 
where  size(cast(v:a as array<int>)) > 0 order by k"
+            qt_sql_3 "select k, v:a, cast(v:b as string) from  ${tableName} 
where  length(cast(v:b as string)) > 4 order  by k"
+            // qt_sql_4 "select k, cast(v:b as string), cast(v:a as string), 
cast(v:c as string) from  ${tableName} where  order by k  limit 5"
+            qt_sql_5 "select cast(v:b as string), cast(v:b.c as string) from  
${tableName} where cast(v:b as string) != 'null' or cast(v:b as string) != '{}' 
order by k desc limit 10;" 
+
+
+            
//TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,QueryHits,PathHash,MetaUrl,CompactionStatus
+            String[][] tablets = sql """ show tablets from ${tableName}; """
+
+            // trigger compactions for all tablets in ${tableName}
+            for (String[] tablet in tablets) {
+                String tablet_id = tablet[0]
+                backend_id = tablet[2]
+                (code, out, err) = 
be_run_cumulative_compaction(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                logger.info("Run compaction: code=" + code + ", out=" + out + 
", err=" + err)
+                assertEquals(code, 0)
+                def compactJson = parseJson(out.trim())
+                if (compactJson.status.toLowerCase() == "fail") {
+                    assertEquals(disableAutoCompaction, false)
+                    logger.info("Compaction was done automatically!")
+                }
+                if (disableAutoCompaction) {
+                    assertEquals("success", compactJson.status.toLowerCase())
+                }
+            }
+
+            // wait for all compactions done
+            for (String[] tablet in tablets) {
+                boolean running = true
+                do {
+                    Thread.sleep(1000)
+                    String tablet_id = tablet[0]
+                    backend_id = tablet[2]
+                    (code, out, err) = 
be_get_compaction_status(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                    logger.info("Get compaction status: code=" + code + ", 
out=" + out + ", err=" + err)
+                    assertEquals(code, 0)
+                    def compactionStatus = parseJson(out.trim())
+                    assertEquals("success", 
compactionStatus.status.toLowerCase())
+                    running = compactionStatus.run_status
+                } while (running)
+            }
+
+            int rowCount = 0
+            for (String[] tablet in tablets) {
+                String tablet_id = tablet[0]
+                def compactionStatusUrlIndex = 18
+                (code, out, err) = curl("GET", 
tablet[compactionStatusUrlIndex])
+                logger.info("Show tablets status: code=" + code + ", out=" + 
out + ", err=" + err)
+                assertEquals(code, 0)
+                def tabletJson = parseJson(out.trim())
+                assert tabletJson.rowsets instanceof List
+                for (String rowset in (List<String>) tabletJson.rowsets) {
+                    rowCount += Integer.parseInt(rowset.split(" ")[1])
+                }
+            }
+            assert (rowCount < 8)
+            qt_sql_11 "SELECT * FROM ${tableName} ORDER BY k, cast(v as 
string); "
+            qt_sql_22 "select k, cast(v:a as array<int>) from  ${tableName} 
where  size(cast(v:a as array<int>)) > 0 order by k"
+            qt_sql_33 "select k, v:a, cast(v:b as string) from  ${tableName} 
where  length(cast(v:b as string)) > 4 order  by k"
+            // qt_sql_44 "select k, cast(v:b as string), cast(v:a as string), 
cast(v:c as string) from  ${tableName} order by k  limit 5"
+            qt_sql_55 "select cast(v:b as string), cast(v:b.c as string) from  
${tableName} where cast(v:b as string) != 'null' and cast(v:b as string) != 
'{}' order by k desc limit 10;" 
+        }
+
+    } finally {
+        // try_sql("DROP TABLE IF EXISTS ${tableName}")
+    }
+}
diff --git a/regression-test/suites/variant_p0/load.groovy 
b/regression-test/suites/variant_p0/load.groovy
index 57737397b49..e0eb85c9e45 100644
--- a/regression-test/suites/variant_p0/load.groovy
+++ b/regression-test/suites/variant_p0/load.groovy
@@ -93,7 +93,7 @@ suite("regression_test_variant", "variant_type"){
             sql """insert into ${table_name} values (10,  '1000000'),(1,  
'{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');"""
             sql """insert into ${table_name} values (11,  '[123.0]'),(1999,  
'{"a" : 1, "b" : {"c" : 1}}'),(19921,  '{"a" : 1, "b" : 10}');"""
             sql """insert into ${table_name} values (12,  '[123.2]'),(1022,  
'{"a" : 1, "b" : 10}'),(1029,  '{"a" : 1, "b" : {"c" : 1}}');"""
-            qt_sql "select k, cast(v:a as array<int>) from  ${table_name} 
where  size(cast(v:a as array<int>)) > 0 order by k, cast(v as string);"
+            qt_sql "select k, cast(v:a as array<int>) from  ${table_name} 
where  size(cast(v:a as array<int>)) > 0 order by k, cast(v:a as string) asc"
             // cast v:b as int should be correct
             // FIXME: unstable, todo use qt_sql
             sql "select k, v, cast(v:b as string) from  ${table_name} where  
length(cast(v:b as string)) > 4 order  by k, cast(v as string)"
@@ -292,23 +292,23 @@ suite("regression_test_variant", "variant_type"){
         set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
         load_json_data.call(table_name, """${getS3Url() + 
'/load/logdata.json'}""")
         qt_sql_32 """ select json_extract(v, "\$.json.parseFailed") from 
logdata where  json_extract(v, "\$.json.parseFailed") != 'null' order by k 
limit 1;"""
-        qt_sql_32_1 """select v:json.parseFailed from  logdata where 
cast(v:json.parseFailed as string) is not null and k = 162 limit 1;"""
+        qt_sql_32_1 """select cast(v:json.parseFailed as string) from  logdata 
where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;"""
         sql "truncate table ${table_name}"
 
         // 0.95 default ratio    
         set_be_config.call("variant_ratio_of_defaults_as_sparse_column", 
"0.95")
         load_json_data.call(table_name, """${getS3Url() + 
'/load/logdata.json'}""")
         qt_sql_33 """ select json_extract(v,"\$.json.parseFailed") from 
logdata where  json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 
1;"""
-        qt_sql_33_1 """select v:json.parseFailed from  logdata where 
cast(v:json.parseFailed as string) is not null and k = 162 limit 1;"""
+        qt_sql_33_1 """select cast(v:json.parseFailed as string) from  logdata 
where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;"""
         sql "truncate table ${table_name}"
 
         // always sparse column
-        set_be_config.call("variant_ratio_of_defaults_as_sparse_column", 
"0.85")
+        set_be_config.call("variant_ratio_of_defaults_as_sparse_column", 
"0.95")
         load_json_data.call(table_name, """${getS3Url() + 
'/load/logdata.json'}""")
         qt_sql_34 """ select json_extract(v, "\$.json.parseFailed") from 
logdata where  json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 
1;"""
         sql "truncate table ${table_name}"
         qt_sql_35 """select json_extract(v,"\$.json.parseFailed")  from 
logdata where k = 162 and  json_extract(v,"\$.json.parseFailed") != 'null';"""
-        qt_sql_35_1 """select v:json.parseFailed from  logdata where 
cast(v:json.parseFailed as string) is not null and k = 162 limit 1;"""
+        qt_sql_35_1 """select cast(v:json.parseFailed as string) from  logdata 
where cast(v:json.parseFailed as string) is not null and k = 162 limit 1;"""
 
         // TODO add test case that some certain columns are materialized in 
some file while others are not materilized(sparse)
         // unique table


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to