This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 56b2fc43d4 [enhancement](array-type) shrink column suffix zero for 
type ARRAY<CHAR> (#12443)
56b2fc43d4 is described below

commit 56b2fc43d4b5a91b693a9025e3dfc7a553e41781
Author: camby <104178...@qq.com>
AuthorDate: Tue Sep 13 23:24:48 2022 +0800

    [enhancement](array-type) shrink column suffix zero for type ARRAY<CHAR> 
(#12443)
    
    In compute level, CHAR type will shrink suffix zeros.
    To keep the logic the same as CHAR type, we also shrink for ARRAY or 
ARRAY<ARRAY> types.
    
    Co-authored-by: cambyzju <zhuxiaol...@baidu.com>
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  13 ++++++++++---
 be/src/olap/rowset/segment_v2/segment_iterator.h   |   2 +-
 be/src/vec/columns/column.h                        |   8 ++++++++
 be/src/vec/columns/column_array.cpp                |   4 ++++
 be/src/vec/columns/column_array.h                  |   3 +++
 be/src/vec/columns/column_nullable.cpp             |   5 +++++
 be/src/vec/columns/column_nullable.h               |   3 +++
 be/src/vec/columns/column_string.cpp               |  12 ++++++++++++
 be/src/vec/columns/column_string.h                 |  12 ++----------
 be/src/vec/core/block.cpp                          |  19 +++----------------
 be/src/vec/core/block.h                            |   1 +
 .../data/load/insert/test_array_insert.out         | Bin 1266 -> 1114 bytes
 .../data/load/insert/test_array_string_insert.out  | Bin 397 -> 373 bytes
 .../array_functions/test_array_functions.out       |  18 ++++++++++++++++++
 .../array_functions/test_array_functions.groovy    |  21 ++++++++++++---------
 15 files changed, 82 insertions(+), 39 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index a92cfa8fab..376545b067 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -872,9 +872,16 @@ void SegmentIterator::_vec_init_char_column_id() {
         auto cid = _schema.column_id(i);
         auto column_desc = _schema.column(cid);
 
-        if (column_desc->type() == OLAP_FIELD_TYPE_CHAR) {
-            _char_type_idx.emplace_back(i);
-        }
+        do {
+            if (column_desc->type() == OLAP_FIELD_TYPE_CHAR) {
+                _char_type_idx.emplace_back(i);
+                break;
+            } else if (column_desc->type() != OLAP_FIELD_TYPE_ARRAY) {
+                break;
+            }
+            // for Array<Char> or Array<Array<Char>>
+            column_desc = column_desc->get_sub_field(0);
+        } while (column_desc != nullptr);
     }
 }
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index e57c43b597..2454167a33 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -205,7 +205,7 @@ private:
 
     io::FileReaderSPtr _file_reader;
 
-    // char_type columns cid
+    // char_type or array<char> type columns cid
     std::vector<size_t> _char_type_idx;
 
     // number of rows read in the current batch
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 6d085fa7c7..3e4bc62cc0 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -124,6 +124,12 @@ public:
         return nullptr;
     }
 
+    // shrink the end zeros for CHAR type or ARRAY<CHAR> type
+    virtual MutablePtr get_shinked_column() {
+        LOG(FATAL) << "Cannot clone_resized() column " << get_name();
+        return nullptr;
+    }
+
     /// Returns number of values in column.
     virtual size_t size() const = 0;
 
@@ -545,6 +551,8 @@ public:
 
     virtual bool is_column_dictionary() const { return false; }
 
+    virtual bool is_column_array() const { return false; }
+
     /// If the only value column can contain is NULL.
     /// Does not imply type of object, because it can be 
ColumnNullable(ColumnNothing) or ColumnConst(ColumnNullable(ColumnNothing))
     virtual bool only_null() const { return false; }
diff --git a/be/src/vec/columns/column_array.cpp 
b/be/src/vec/columns/column_array.cpp
index a589482c22..473cde0982 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -77,6 +77,10 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : 
data(std::move(nest
     offsets = ColumnOffsets::create();
 }
 
+MutableColumnPtr ColumnArray::get_shinked_column() {
+    return ColumnArray::create(data->get_shinked_column(), 
offsets->assume_mutable());
+}
+
 std::string ColumnArray::get_name() const {
     return "Array(" + get_data().get_name() + ")";
 }
diff --git a/be/src/vec/columns/column_array.h 
b/be/src/vec/columns/column_array.h
index d3bcb924f5..2f8df6d83d 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -78,11 +78,14 @@ public:
         return Base::create(std::forward<Args>(args)...);
     }
 
+    MutableColumnPtr get_shinked_column() override;
+
     /** On the index i there is an offset to the beginning of the i + 1 -th 
element. */
     using ColumnOffsets = ColumnVector<Offset64>;
 
     std::string get_name() const override;
     const char* get_family_name() const override { return "Array"; }
+    bool is_column_array() const override { return true; }
     bool can_be_inside_nullable() const override { return true; }
     TypeIndex get_data_type() const { return TypeIndex::Array; }
     MutableColumnPtr clone_resized(size_t size) const override;
diff --git a/be/src/vec/columns/column_nullable.cpp 
b/be/src/vec/columns/column_nullable.cpp
index 29b1887421..b6bf95f449 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -45,6 +45,11 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& 
nested_column_, MutableColumnP
     }
 }
 
+MutableColumnPtr ColumnNullable::get_shinked_column() {
+    return 
ColumnNullable::create(get_nested_column_ptr()->get_shinked_column(),
+                                  get_null_map_column_ptr());
+}
+
 void ColumnNullable::update_hash_with_value(size_t n, SipHash& hash) const {
     if (is_null_at(n))
         hash.update(0);
diff --git a/be/src/vec/columns/column_nullable.h 
b/be/src/vec/columns/column_nullable.h
index 523ff337ae..cb399ab761 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -67,6 +67,8 @@ public:
         return Base::create(std::forward<Args>(args)...);
     }
 
+    MutableColumnPtr get_shinked_column() override;
+
     const char* get_family_name() const override { return "Nullable"; }
     std::string get_name() const override { return "Nullable(" + 
nested_column->get_name() + ")"; }
     MutableColumnPtr clone_resized(size_t size) const override;
@@ -199,6 +201,7 @@ public:
     bool is_bitmap() const override { return get_nested_column().is_bitmap(); }
     bool is_column_decimal() const override { return 
get_nested_column().is_column_decimal(); }
     bool is_column_string() const override { return 
get_nested_column().is_column_string(); }
+    bool is_column_array() const override { return 
get_nested_column().is_column_array(); }
     bool is_fixed_and_contiguous() const override { return false; }
     bool values_have_fixed_size() const override { return 
nested_column->values_have_fixed_size(); }
     size_t size_of_value_if_fixed() const override {
diff --git a/be/src/vec/columns/column_string.cpp 
b/be/src/vec/columns/column_string.cpp
index 20f0d3c534..ccc1074e29 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -64,6 +64,18 @@ MutableColumnPtr ColumnString::clone_resized(size_t to_size) 
const {
     return res;
 }
 
+MutableColumnPtr ColumnString::get_shinked_column() {
+    auto shrinked_column = ColumnString::create();
+    shrinked_column->get_offsets().reserve(offsets.size());
+    shrinked_column->get_chars().reserve(chars.size());
+    for (int i = 0; i < size(); i++) {
+        StringRef str = get_data_at(i);
+        reinterpret_cast<ColumnString*>(shrinked_column.get())
+                ->insert_data(str.data, strnlen(str.data, str.size));
+    }
+    return shrinked_column;
+}
+
 void ColumnString::insert_range_from(const IColumn& src, size_t start, size_t 
length) {
     if (length == 0) return;
 
diff --git a/be/src/vec/columns/column_string.h 
b/be/src/vec/columns/column_string.h
index ee8e6cfdd3..2d972ac980 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -84,6 +84,8 @@ public:
 
     MutableColumnPtr clone_resized(size_t to_size) const override;
 
+    MutableColumnPtr get_shinked_column() override;
+
     Field operator[](size_t n) const override {
         assert(n < size());
         return Field(&chars[offset_at(n)], size_at(n) - 1);
@@ -374,16 +376,6 @@ public:
 
         chars.emplace_back(0);
     }
-
-    MutableColumnPtr get_shinked_column() const {
-        auto shrinked_column = ColumnString::create();
-        for (int i = 0; i < size(); i++) {
-            StringRef str = get_data_at(i);
-            reinterpret_cast<ColumnString*>(shrinked_column.get())
-                    ->insert_data(str.data, strnlen(str.data, str.size));
-        }
-        return shrinked_column;
-    }
 };
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index cefa2b0b39..af90143955 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -1063,25 +1063,12 @@ std::unique_ptr<Block> 
Block::create_same_struct_block(size_t size) const {
 void Block::shrink_char_type_column_suffix_zero(const std::vector<size_t>& 
char_type_idx) {
     for (auto idx : char_type_idx) {
         if (idx < data.size()) {
-            if (this->get_by_position(idx).column->is_nullable()) {
-                this->get_by_position(idx).column = ColumnNullable::create(
-                        reinterpret_cast<const ColumnString*>(
-                                reinterpret_cast<const ColumnNullable*>(
-                                        
this->get_by_position(idx).column.get())
-                                        ->get_nested_column_ptr()
-                                        .get())
-                                ->get_shinked_column(),
-                        reinterpret_cast<const ColumnNullable*>(
-                                this->get_by_position(idx).column.get())
-                                ->get_null_map_column_ptr());
-            } else {
-                this->get_by_position(idx).column = reinterpret_cast<const 
ColumnString*>(
-                                                            
this->get_by_position(idx).column.get())
-                                                            
->get_shinked_column();
-            }
+            auto& col_and_name = this->get_by_position(idx);
+            col_and_name.column = 
col_and_name.column->assume_mutable()->get_shinked_column();
         }
     }
 }
+
 size_t MutableBlock::allocated_bytes() const {
     size_t res = 0;
     for (const auto& col : _columns) {
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index aa603a1800..75813df153 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -351,6 +351,7 @@ public:
     doris::Tuple* deep_copy_tuple(const TupleDescriptor&, MemPool*, int, int,
                                   bool padding_char = false);
 
+    // for String type or Array<String> type
     void shrink_char_type_column_suffix_zero(const std::vector<size_t>& 
char_type_idx);
 
     int64_t get_decompress_time() const { return _decompress_time_ns; }
diff --git a/regression-test/data/load/insert/test_array_insert.out 
b/regression-test/data/load/insert/test_array_insert.out
index 18f66e59fe..2bb7e044a1 100644
Binary files a/regression-test/data/load/insert/test_array_insert.out and 
b/regression-test/data/load/insert/test_array_insert.out differ
diff --git a/regression-test/data/load/insert/test_array_string_insert.out 
b/regression-test/data/load/insert/test_array_string_insert.out
index ff69c931ae..7a31b4210e 100644
Binary files a/regression-test/data/load/insert/test_array_string_insert.out 
and b/regression-test/data/load/insert/test_array_string_insert.out differ
diff --git 
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
 
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
index 518865780d..2a2f2c933b 100644
--- 
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
+++ 
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
@@ -116,3 +116,21 @@
 6      1_2_3_4_5_4_3_2_1       a-b-c-d-c-b-a
 7      8_9_null_10_null        f-null-g-null-h
 
+-- !select --
+1      true
+2      false
+3      false
+4      \N
+5      \N
+6      \N
+7      \N
+
+-- !select --
+1      false
+2      false
+3      false
+4      \N
+5      \N
+6      \N
+7      \N
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
 
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
index c374a43e51..a4f6c60dd4 100644
--- 
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
@@ -23,12 +23,13 @@ suite("test_array_functions") {
     sql """ set enable_vectorized_engine = true """
 
     sql """DROP TABLE IF EXISTS ${tableName}"""
-    sql """ 
+    sql """
             CREATE TABLE IF NOT EXISTS ${tableName} (
               `k1` int(11) NULL COMMENT "",
               `k2` ARRAY<int(11)> NOT NULL COMMENT "",
               `k3` ARRAY<VARCHAR(20)> NULL COMMENT "",
-              `k4` ARRAY<int(11)> NULL COMMENT ""
+              `k4` ARRAY<int(11)> NULL COMMENT "",
+              `k5` ARRAY<CHAR(5)> NULL COMMENT ""
             ) ENGINE=OLAP
             DUPLICATE KEY(`k1`)
             DISTRIBUTED BY HASH(`k1`) BUCKETS 1
@@ -37,13 +38,13 @@ suite("test_array_functions") {
             "storage_format" = "V2"
             )
         """
-    sql """ INSERT INTO ${tableName} VALUES(1, [1, 2, 3], ["a", "b", ""], [1, 
2]) """
-    sql """ INSERT INTO ${tableName} VALUES(2, [4], NULL, [5]) """
-    sql """ INSERT INTO ${tableName} VALUES(3, [], [], NULL) """
-    sql """ INSERT INTO ${tableName} VALUES(4, [1, 2, 3, 4, 5, 4, 3, 2, 1], 
[], []) """
-    sql """ INSERT INTO ${tableName} VALUES(5, [], ["a", "b", "c", "d", "c", 
"b", "a"], NULL) """
-    sql """ INSERT INTO ${tableName} VALUES(6, [1, 2, 3, 4, 5, 4, 3, 2, 1], 
["a", "b", "c", "d", "c", "b", "a"], NULL) """
-    sql """ INSERT INTO ${tableName} VALUES(7, [8, 9, NULL, 10, NULL], ["f", 
NULL, "g", NULL, "h"], NULL) """
+    sql """ INSERT INTO ${tableName} 
VALUES(1,[1,2,3],["a","b",""],[1,2],["hi"]) """
+    sql """ INSERT INTO ${tableName} VALUES(2,[4],NULL,[5],["hi2"]) """
+    sql """ INSERT INTO ${tableName} VALUES(3,[],[],NULL,["hi3"]) """
+    sql """ INSERT INTO ${tableName} VALUES(4,[1,2,3,4,5,4,3,2,1],[],[],NULL) 
"""
+    sql """ INSERT INTO ${tableName} 
VALUES(5,[],["a","b","c","d","c","b","a"],NULL,NULL) """
+    sql """ INSERT INTO ${tableName} 
VALUES(6,[1,2,3,4,5,4,3,2,1],["a","b","c","d","c","b","a"],NULL,NULL) """
+    sql """ INSERT INTO ${tableName} 
VALUES(7,[8,9,NULL,10,NULL],["f",NULL,"g",NULL,"h"],NULL,NULL) """
 
     qt_select "SELECT k1, size(k2), size(k3) FROM ${tableName} ORDER BY k1"
     qt_select "SELECT k1, cardinality(k2), cardinality(k3) FROM ${tableName} 
ORDER BY k1"
@@ -58,4 +59,6 @@ suite("test_array_functions") {
     qt_select "SELECT k1, array_slice(k2, 1, 2) FROM ${tableName} ORDER BY k1"
     qt_select "SELECT k1, reverse(k2), reverse(k3), reverse(k4) FROM 
${tableName} ORDER BY k1"
     qt_select "SELECT k1, array_join(k2, '_', 'null'), array_join(k3, '-', 
'null') FROM ${tableName} ORDER BY k1"
+    qt_select "SELECT k1, array_contains(k5, 'hi') FROM ${tableName} ORDER BY 
k1"
+    qt_select "SELECT k1, array_contains(k5, 'hi222') FROM ${tableName} ORDER 
BY k1"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to