This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit a3144915351223fbd625f724a26d5d933ae10cd8 Author: airborne12 <airborn...@gmail.com> AuthorDate: Fri Jan 12 09:07:42 2024 +0800 [Fix](inverted index) fix array inverted index builder error (#29869) --- be/src/olap/task/index_builder.cpp | 65 ++++++----- .../inverted_index_p0/test_array_index.groovy | 120 +++++++++++++++++++++ 2 files changed, 152 insertions(+), 33 deletions(-) diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 32637db5af3..e7555c6b869 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -316,30 +316,26 @@ Status IndexBuilder::_add_nullable(const std::string& column_name, } return step; }; + // TODO: need to process null data for inverted index if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) { DCHECK(field->get_sub_field_count() == 1); - BitmapIterator null_iter(null_map, num_rows); - bool is_null = false; - size_t this_run = 0; - while ((this_run = null_iter.Next(&is_null)) > 0) { - if (is_null) { - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls(this_run)); - } else { - // [size, offset_ptr, item_data_ptr, item_nullmap_ptr] - auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr); - // total number length - size_t element_cnt = size_t((unsigned long)(*data_ptr)); - auto offset_data = *(data_ptr + 1); - const uint8_t* offsets_ptr = (const uint8_t*)offset_data; - if (element_cnt > 0) { - auto data = *(data_ptr + 2); - auto nested_null_map = *(data_ptr + 3); - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values( - field->get_sub_field(0)->size(), reinterpret_cast<const void*>(data), - reinterpret_cast<const uint8_t*>(nested_null_map), offsets_ptr, - num_rows)); - } + // [size, offset_ptr, item_data_ptr, item_nullmap_ptr] + auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr); + // total number length + size_t element_cnt = size_t((unsigned long)(*data_ptr)); + auto offset_data = *(data_ptr + 1); + const uint8_t* offsets_ptr = (const uint8_t*)offset_data; + try { + if (element_cnt > 0) { + auto data = *(data_ptr + 2); + auto nested_null_map = *(data_ptr + 3); + RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values( + field->get_sub_field(0)->size(), reinterpret_cast<const void*>(data), + reinterpret_cast<const uint8_t*>(nested_null_map), offsets_ptr, num_rows)); } + } catch (const std::exception& e) { + return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + "CLuceneError occured: {}", e.what()); } return Status::OK(); } @@ -350,15 +346,8 @@ Status IndexBuilder::_add_nullable(const std::string& column_name, if (null_map[offset]) { RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls(step)); } else { - if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) { - DCHECK(field->get_sub_field_count() == 1); - const auto* col_cursor = reinterpret_cast<const CollectionValue*>(*ptr); - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values( - field->get_sub_field(0)->size(), col_cursor, step)); - } else { - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values( - column_name, *ptr, step)); - } + RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values( + column_name, *ptr, step)); } *ptr += field->size() * step; offset += step; @@ -377,9 +366,19 @@ Status IndexBuilder::_add_data(const std::string& column_name, try { if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) { DCHECK(field->get_sub_field_count() == 1); - const auto* col_cursor = reinterpret_cast<const CollectionValue*>(*ptr); - RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values( - field->get_sub_field(0)->size(), col_cursor, num_rows)); + // [size, offset_ptr, item_data_ptr, item_nullmap_ptr] + auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr); + // total number length + size_t element_cnt = size_t((unsigned long)(*data_ptr)); + auto offset_data = *(data_ptr + 1); + const uint8_t* offsets_ptr = (const uint8_t*)offset_data; + if (element_cnt > 0) { + auto data = *(data_ptr + 2); + auto nested_null_map = *(data_ptr + 3); + RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values( + field->get_sub_field(0)->size(), reinterpret_cast<const void*>(data), + reinterpret_cast<const uint8_t*>(nested_null_map), offsets_ptr, num_rows)); + } } else { RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values( column_name, *ptr, num_rows)); diff --git a/regression-test/suites/inverted_index_p0/test_array_index.groovy b/regression-test/suites/inverted_index_p0/test_array_index.groovy new file mode 100644 index 00000000000..dc4c1bc663a --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_array_index.groovy @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_array_index1"){ + // prepare test table + + def timeout = 60000 + def delta_time = 1000 + def alter_res = "null" + def useTime = 0 + + def indexTblName = "test_array_index" + + sql "DROP TABLE IF EXISTS ${indexTblName}" + // create 1 replica table + sql """ + CREATE TABLE `${indexTblName}` ( + `apply_date` date NULL COMMENT '', + `id` varchar(60) NOT NULL COMMENT '', + `inventors` array<text> NULL COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`apply_date`, `id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '[\"a\", \"b\", \"c\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '[\"d\", \"e\", \"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '48a33ec3453a28bce84b8f96fe161956', '[\"m\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '021603e7dcfe65d44af0efd0e5aee154', '[\"n\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '9fcb57ae675f0af4d613d9e6c0e8a2a2', '[\"o\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a3'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a4', NULL); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a5', '[]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '[null,null,null]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a7', [null,null,null]); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a8', []); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a648a447b8f71522f11632eba4b4adde', '[\"p\", \"q\", \"r\", \"s\", \"t\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a9fb5c985c90bf05f3bee5ca3ae95260', '[\"u\", \"v\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '[\"w\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '[\"x\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """ + + sql """ ALTER TABLE ${indexTblName} ADD INDEX index_inverted_inventors(inventors) USING INVERTED COMMENT ''; """ + + sql """ BUILD INDEX index_inverted_inventors ON ${indexTblName}; """ +} + +suite("test_array_index2"){ + // prepare test table + + def timeout = 60000 + def delta_time = 1000 + def alter_res = "null" + def useTime = 0 + + def indexTblName = "test_array_index2" + + sql "DROP TABLE IF EXISTS ${indexTblName}" + // create 1 replica table + sql """ + CREATE TABLE `${indexTblName}` ( + `apply_date` date NULL COMMENT '', + `id` varchar(60) NOT NULL COMMENT '', + `inventors` array<text> NULL COMMENT '', + INDEX index_inverted_inventors(inventors) USING INVERTED COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`apply_date`, `id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '[\"a\", \"b\", \"c\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '[\"d\", \"e\", \"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '48a33ec3453a28bce84b8f96fe161956', '[\"m\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '021603e7dcfe65d44af0efd0e5aee154', '[\"n\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '9fcb57ae675f0af4d613d9e6c0e8a2a2', '[\"o\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a3'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a4', NULL); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a5', '[]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '[null,null,null]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a7', [null,null,null]); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a8', []); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a648a447b8f71522f11632eba4b4adde', '[\"p\", \"q\", \"r\", \"s\", \"t\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a9fb5c985c90bf05f3bee5ca3ae95260', '[\"u\", \"v\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '[\"w\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '[\"x\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """ +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org