This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit a3144915351223fbd625f724a26d5d933ae10cd8
Author: airborne12 <airborn...@gmail.com>
AuthorDate: Fri Jan 12 09:07:42 2024 +0800

    [Fix](inverted index) fix array inverted index builder error (#29869)
---
 be/src/olap/task/index_builder.cpp                 |  65 ++++++-----
 .../inverted_index_p0/test_array_index.groovy      | 120 +++++++++++++++++++++
 2 files changed, 152 insertions(+), 33 deletions(-)

diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index 32637db5af3..e7555c6b869 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -316,30 +316,26 @@ Status IndexBuilder::_add_nullable(const std::string& 
column_name,
         }
         return step;
     };
+    // TODO: need to process null data for inverted index
     if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
         DCHECK(field->get_sub_field_count() == 1);
-        BitmapIterator null_iter(null_map, num_rows);
-        bool is_null = false;
-        size_t this_run = 0;
-        while ((this_run = null_iter.Next(&is_null)) > 0) {
-            if (is_null) {
-                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls(this_run));
-            } else {
-                // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
-                auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
-                // total number length
-                size_t element_cnt = size_t((unsigned long)(*data_ptr));
-                auto offset_data = *(data_ptr + 1);
-                const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
-                if (element_cnt > 0) {
-                    auto data = *(data_ptr + 2);
-                    auto nested_null_map = *(data_ptr + 3);
-                    
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
-                            field->get_sub_field(0)->size(), 
reinterpret_cast<const void*>(data),
-                            reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr,
-                            num_rows));
-                }
+        // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
+        auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
+        // total number length
+        size_t element_cnt = size_t((unsigned long)(*data_ptr));
+        auto offset_data = *(data_ptr + 1);
+        const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
+        try {
+            if (element_cnt > 0) {
+                auto data = *(data_ptr + 2);
+                auto nested_null_map = *(data_ptr + 3);
+                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
+                        field->get_sub_field(0)->size(), 
reinterpret_cast<const void*>(data),
+                        reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr, num_rows));
             }
+        } catch (const std::exception& e) {
+            return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+                    "CLuceneError occured: {}", e.what());
         }
         return Status::OK();
     }
@@ -350,15 +346,8 @@ Status IndexBuilder::_add_nullable(const std::string& 
column_name,
             if (null_map[offset]) {
                 
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls(step));
             } else {
-                if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
-                    DCHECK(field->get_sub_field_count() == 1);
-                    const auto* col_cursor = reinterpret_cast<const 
CollectionValue*>(*ptr);
-                    
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
-                            field->get_sub_field(0)->size(), col_cursor, 
step));
-                } else {
-                    
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
-                            column_name, *ptr, step));
-                }
+                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
+                        column_name, *ptr, step));
             }
             *ptr += field->size() * step;
             offset += step;
@@ -377,9 +366,19 @@ Status IndexBuilder::_add_data(const std::string& 
column_name,
     try {
         if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
             DCHECK(field->get_sub_field_count() == 1);
-            const auto* col_cursor = reinterpret_cast<const 
CollectionValue*>(*ptr);
-            
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
-                    field->get_sub_field(0)->size(), col_cursor, num_rows));
+            // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
+            auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
+            // total number length
+            size_t element_cnt = size_t((unsigned long)(*data_ptr));
+            auto offset_data = *(data_ptr + 1);
+            const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
+            if (element_cnt > 0) {
+                auto data = *(data_ptr + 2);
+                auto nested_null_map = *(data_ptr + 3);
+                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
+                        field->get_sub_field(0)->size(), 
reinterpret_cast<const void*>(data),
+                        reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr, num_rows));
+            }
         } else {
             
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
                     column_name, *ptr, num_rows));
diff --git a/regression-test/suites/inverted_index_p0/test_array_index.groovy 
b/regression-test/suites/inverted_index_p0/test_array_index.groovy
new file mode 100644
index 00000000000..dc4c1bc663a
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_array_index.groovy
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_array_index1"){
+    // prepare test table
+
+    def timeout = 60000
+    def delta_time = 1000
+    def alter_res = "null"
+    def useTime = 0
+
+    def indexTblName = "test_array_index"
+
+    sql "DROP TABLE IF EXISTS ${indexTblName}"
+    // create 1 replica table
+    sql """
+       CREATE TABLE `${indexTblName}` (
+      `apply_date` date NULL COMMENT '',
+      `id` varchar(60) NOT NULL COMMENT '',
+      `inventors` array<text> NULL COMMENT ''
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`apply_date`, `id`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "is_being_synced" = "false",
+    "storage_format" = "V2",
+    "light_schema_change" = "true",
+    "disable_auto_compaction" = "false",
+    "enable_single_replica_compaction" = "false"
+    );
+    """
+
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '[\"a\", \"b\", 
\"c\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '[\"d\", \"e\", 
\"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '48a33ec3453a28bce84b8f96fe161956', '[\"m\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '021603e7dcfe65d44af0efd0e5aee154', '[\"n\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '9fcb57ae675f0af4d613d9e6c0e8a2a2', '[\"o\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`) VALUES 
('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a3'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a4', NULL); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a5', '[]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '[null,null,null]'); 
"""
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a7', [null,null,null]); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a8', []); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2019-01-01', 'a648a447b8f71522f11632eba4b4adde', '[\"p\", \"q\", 
\"r\", \"s\", \"t\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2019-01-01', 'a9fb5c985c90bf05f3bee5ca3ae95260', '[\"u\", \"v\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '[\"w\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '[\"x\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """
+
+    sql """ ALTER TABLE ${indexTblName} ADD INDEX 
index_inverted_inventors(inventors) USING INVERTED  COMMENT ''; """
+
+    sql """ BUILD INDEX index_inverted_inventors ON ${indexTblName}; """
+}
+
+suite("test_array_index2"){
+    // prepare test table
+
+    def timeout = 60000
+    def delta_time = 1000
+    def alter_res = "null"
+    def useTime = 0
+
+    def indexTblName = "test_array_index2"
+
+    sql "DROP TABLE IF EXISTS ${indexTblName}"
+    // create 1 replica table
+    sql """
+       CREATE TABLE `${indexTblName}` (
+      `apply_date` date NULL COMMENT '',
+      `id` varchar(60) NOT NULL COMMENT '',
+      `inventors` array<text> NULL COMMENT '',
+      INDEX index_inverted_inventors(inventors) USING INVERTED  COMMENT ''
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`apply_date`, `id`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "is_being_synced" = "false",
+    "storage_format" = "V2",
+    "light_schema_change" = "true",
+    "disable_auto_compaction" = "false",
+    "enable_single_replica_compaction" = "false"
+    );
+    """
+
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '[\"a\", \"b\", 
\"c\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '[\"d\", \"e\", 
\"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '48a33ec3453a28bce84b8f96fe161956', '[\"m\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '021603e7dcfe65d44af0efd0e5aee154', '[\"n\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '9fcb57ae675f0af4d613d9e6c0e8a2a2', '[\"o\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`) VALUES 
('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a3'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a4', NULL); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a5', '[]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '[null,null,null]'); 
"""
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a7', [null,null,null]); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a8', []); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2019-01-01', 'a648a447b8f71522f11632eba4b4adde', '[\"p\", \"q\", 
\"r\", \"s\", \"t\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2019-01-01', 'a9fb5c985c90bf05f3bee5ca3ae95260', '[\"u\", \"v\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '[\"w\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '[\"x\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) 
VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to