This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 621944d4874 [InvertedIndex](Variant) supoort inverted index for array type in variant (#48594) 621944d4874 is described below commit 621944d4874e8475ba76baaf4f978a9a65cbe216 Author: lihangyu <lihan...@selectdb.com> AuthorDate: Wed Mar 5 10:02:13 2025 +0800 [InvertedIndex](Variant) supoort inverted index for array type in variant (#48594) cherry-pick from #47688 --- .../rowset/segment_v2/inverted_index_writer.cpp | 8 +- be/src/vec/exprs/vexpr.cpp | 4 +- be/test/common/schema_util_test.cpp | 12 ++ .../data/variant_github_events_new_p2/load.out | Bin 152 -> 222 bytes .../test_array_contains_with_inverted_index.out | Bin 0 -> 45214 bytes .../test_array_inverted_index_profile.out | Bin 0 -> 2004 bytes .../test_array_contains_with_inverted_index.groovy | 1 - .../test_array_with_inverted_index_all_type.groovy | 1 - .../variant_github_events_new_p2/load.groovy | 61 +++++++- .../test_array_contains_with_inverted_index.groovy | 95 ++++++------- .../with_index/test_array_index_write.groovy | 93 ++++++++++++ .../test_array_inverted_index_profile.groovy | 156 +++++++++++++++++++++ 12 files changed, 376 insertions(+), 55 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 64c373db166..949a823055d 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -77,15 +77,19 @@ bool InvertedIndexColumnWriter::check_support_inverted_index(const TabletColumn& static std::set<FieldType> invalid_types = { FieldType::OLAP_FIELD_TYPE_DOUBLE, FieldType::OLAP_FIELD_TYPE_JSONB, - FieldType::OLAP_FIELD_TYPE_ARRAY, FieldType::OLAP_FIELD_TYPE_FLOAT, }; - if (column.is_extracted_column() && (invalid_types.contains(column.type()))) { + if (invalid_types.contains(column.type())) { return false; } if (column.is_variant_type()) { return false; } + if (column.is_array_type()) { + // only support one level array + const auto& subcolumn = column.get_sub_column(0); + return !subcolumn.is_array_type() && check_support_inverted_index(subcolumn); + } return true; } diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 0035b2a292f..17985c57cf7 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -643,7 +643,7 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase context->get_inverted_index_context() ->get_storage_name_and_type_by_column_id(column_id); auto storage_type = remove_nullable(storage_name_type->second); - auto target_type = cast_expr->get_target_type(); + auto target_type = remove_nullable(cast_expr->get_target_type()); auto origin_primitive_type = storage_type->get_type_as_type_descriptor().type; auto target_primitive_type = target_type->get_type_as_type_descriptor().type; if (is_complex_type(storage_type)) { @@ -663,7 +663,7 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase } } if (origin_primitive_type != TYPE_VARIANT && - (origin_primitive_type == target_primitive_type || + (storage_type->equals(*target_type) || (is_string_type(target_primitive_type) && is_string_type(origin_primitive_type)))) { children_exprs.emplace_back(expr_without_cast(child)); diff --git a/be/test/common/schema_util_test.cpp b/be/test/common/schema_util_test.cpp index 5fd157756bf..c747dd13e2f 100644 --- a/be/test/common/schema_util_test.cpp +++ b/be/test/common/schema_util_test.cpp @@ -48,6 +48,18 @@ void construct_subcolumn(TabletSchemaSPtr schema, const FieldType& type, int32_t vectorized::PathInData col_path(path); subcol.set_path_info(col_path); subcol.set_name(col_path.get_path()); + + if (type == FieldType::OLAP_FIELD_TYPE_ARRAY) { + TabletColumn array_item_col; + // double not support inverted index + array_item_col.set_type(FieldType::OLAP_FIELD_TYPE_DOUBLE); + array_item_col.set_is_nullable(true); + array_item_col.set_unique_id(-1); + array_item_col.set_parent_unique_id(col_unique_id); + + subcol.add_sub_column(array_item_col); + } + schema->append_column(subcol); subcolumns->emplace_back(std::move(subcol)); } diff --git a/regression-test/data/variant_github_events_new_p2/load.out b/regression-test/data/variant_github_events_new_p2/load.out index 0aeaaeed024..2b5aaa3d4b9 100644 Binary files a/regression-test/data/variant_github_events_new_p2/load.out and b/regression-test/data/variant_github_events_new_p2/load.out differ diff --git a/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out b/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out new file mode 100644 index 00000000000..ff409b7405a Binary files /dev/null and b/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out differ diff --git a/regression-test/data/variant_p0/with_index/test_array_inverted_index_profile.out b/regression-test/data/variant_p0/with_index/test_array_inverted_index_profile.out new file mode 100644 index 00000000000..d2e04cd5b95 Binary files /dev/null and b/regression-test/data/variant_p0/with_index/test_array_inverted_index_profile.out differ diff --git a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy index 83ff988e039..80b31b05131 100644 --- a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy +++ b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy @@ -21,7 +21,6 @@ suite("test_array_contains_with_inverted_index"){ // If we use common expr pass to inverted index , we should set enable_common_expr_pushdown = true sql """ set enable_common_expr_pushdown = true; """ -// sql """ set enable_common_expr_pushdown_for_inverted_index = true; """ sql """ set enable_pipeline_x_engine = true;""" sql """ set enable_profile = true;""" diff --git a/regression-test/suites/inverted_index_p0/test_array_with_inverted_index_all_type.groovy b/regression-test/suites/inverted_index_p0/test_array_with_inverted_index_all_type.groovy index 6b0c4b89f28..17a43ccdbfb 100644 --- a/regression-test/suites/inverted_index_p0/test_array_with_inverted_index_all_type.groovy +++ b/regression-test/suites/inverted_index_p0/test_array_with_inverted_index_all_type.groovy @@ -33,7 +33,6 @@ suite("test_array_with_inverted_index_all_type"){ sql """ set enable_profile = true;""" // If we use common expr pass to inverted index , we should set enable_common_expr_pushdown = true sql """ set enable_common_expr_pushdown = true; """ -// sql """ set enable_common_expr_pushdown_for_inverted_index = true; """ // duplicate key table with all type using standard parser for inverted index sql "DROP TABLE IF EXISTS ${indexTblNames[0]}" diff --git a/regression-test/suites/variant_github_events_new_p2/load.groovy b/regression-test/suites/variant_github_events_new_p2/load.groovy index 4291747f2d2..122e7001a3c 100644 --- a/regression-test/suites/variant_github_events_new_p2/load.groovy +++ b/regression-test/suites/variant_github_events_new_p2/load.groovy @@ -61,7 +61,19 @@ suite("regression_test_variant_github_events_p2", "nonConcurrent,p2"){ CREATE TABLE IF NOT EXISTS ${table_name} ( k bigint, v variant, - INDEX idx_var(v) USING INVERTED PROPERTIES("parser" = "english") COMMENT '' + INDEX idx_var(v) USING INVERTED COMMENT '' + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 4 + properties("replication_num" = "1", "disable_auto_compaction" = "false"); + """ + + sql """DROP TABLE IF EXISTS github_events_arr""" + sql """ + CREATE TABLE IF NOT EXISTS github_events_arr ( + k bigint, + v array<text>, + INDEX idx_var(v) USING INVERTED COMMENT '' ) DUPLICATE KEY(`k`) DISTRIBUTED BY HASH(k) BUCKETS 4 @@ -69,14 +81,30 @@ suite("regression_test_variant_github_events_p2", "nonConcurrent,p2"){ """ // 2015 load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""") + sql """insert into github_events_arr select k, cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>) from github_events""" load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-1.json'}""") + sql """insert into github_events_arr select k, cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>) from github_events""" load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-2.json'}""") + sql """insert into github_events_arr select k, cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>) from github_events""" load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-3.json'}""") + sql """insert into github_events_arr select k, cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>) from github_events""" // 2022 load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-16.json'}""") + sql """insert into github_events_arr select k, cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>) from github_events""" load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-10.json'}""") + sql """insert into github_events_arr select k, cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>) from github_events""" load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-22.json'}""") + sql """insert into github_events_arr select k, cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>) from github_events""" load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-23.json'}""") + + // test array index + sql """insert into github_events_arr select k, cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>) from github_events""" + sql "set enable_common_expr_pushdown = true; " + qt_sql """select count() from github_events_arr where array_contains(v, 'css');""" + sql "set enable_common_expr_pushdown = false; " + qt_sql """select count() from github_events_arr where array_contains(v, 'css');""" + sql "set enable_common_expr_pushdown = true; " + // TODO fix compaction issue, this case could be stable qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int) from github_events where cast(v["repo"]["name"] as string) = 'xpressengine/xe-core' order by 1;""" // TODO add test case that some certain columns are materialized in some file while others are not materilized(sparse) @@ -127,4 +155,33 @@ suite("regression_test_variant_github_events_p2", "nonConcurrent,p2"){ sql """DELETE FROM github_events where k >= 9223372036854775107""" qt_sql_select_count """ select count(*) from github_events_2; """ -} + + trigger_and_wait_compaction("github_events", "full") + + // query and filterd by inverted index + profile("test_profile_1") { + sql """ set enable_common_expr_pushdown = true; """ + sql """ set enable_pipeline_x_engine = true;""" + sql """ set enable_profile = true;""" + sql """ set profile_level = 2;""" + run { + qt_sql_inv """/* test_profile_1 */ + select count() from github_events where arrays_overlap(cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>), ['javascript', 'css'] ) + """ + } + + check { profileString, exception -> + log.info(profileString) + // Use a regular expression to match the numeric value inside parentheses after "RowsInvertedIndexFiltered:" + def matcher = (profileString =~ /RowsInvertedIndexFiltered:\s+[^\(]+\((\d+)\)/) + def total = 0 + while (matcher.find()) { + total += matcher.group(1).toInteger() + } + // Assert that the sum of all matched numbers equals 67677 + assertEquals(67677, total) + } + } + sql """ set enable_common_expr_pushdown = true; """ + qt_sql_inv """select count() from github_events where arrays_overlap(cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>), ['javascript', 'css'] )""" +} \ No newline at end of file diff --git a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy b/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy similarity index 56% copy from regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy copy to regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy index 83ff988e039..e31ae973e91 100644 --- a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy +++ b/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy @@ -18,20 +18,20 @@ suite("test_array_contains_with_inverted_index"){ // prepare test table def indexTblName = "tai" + sql "set disable_inverted_index_v1_for_variant = false" // If we use common expr pass to inverted index , we should set enable_common_expr_pushdown = true sql """ set enable_common_expr_pushdown = true; """ -// sql """ set enable_common_expr_pushdown_for_inverted_index = true; """ - sql """ set enable_pipeline_x_engine = true;""" sql """ set enable_profile = true;""" sql "DROP TABLE IF EXISTS ${indexTblName}" // create 1 replica table + def storageFormat = new Random().nextBoolean() ? "V1" : "V2" sql """ CREATE TABLE IF NOT EXISTS `${indexTblName}` ( `apply_date` date NULL COMMENT '', `id` varchar(60) NOT NULL COMMENT '', - `inventors` array<text> NULL COMMENT '', + `inventors` variant NULL COMMENT '', INDEX index_inverted_inventors(inventors) USING INVERTED COMMENT '' ) ENGINE=OLAP DUPLICATE KEY(`apply_date`, `id`) @@ -43,67 +43,68 @@ suite("test_array_contains_with_inverted_index"){ "storage_format" = "V2", "light_schema_change" = "true", "disable_auto_compaction" = "false", - "enable_single_replica_compaction" = "false" + "enable_single_replica_compaction" = "false", + "inverted_index_storage_format" = "$storageFormat" ); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '[\"a\", \"b\", \"c\"]'); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '[\"d\", \"e\", \"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\"]'); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '48a33ec3453a28bce84b8f96fe161956', '[\"m\"]'); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '021603e7dcfe65d44af0efd0e5aee154', '[\"n\"]'); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '9fcb57ae675f0af4d613d9e6c0e8a2a2', '[\"o\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '{"inventors":["a", "b", "c"]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '{"inventors":["d", "e", "f", "g", "h", "i", "j", "k", "l"]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '48a33ec3453a28bce84b8f96fe161956', '{"inventors":["m"]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '021603e7dcfe65d44af0efd0e5aee154', '{"inventors":["n"]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '9fcb57ae675f0af4d613d9e6c0e8a2a2', '{"inventors":["o"]}'); """ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a3'); """ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a4', NULL); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a5', '[]'); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '[null,null,null]'); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a7', [null,null,null]); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a8', []); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a648a447b8f71522f11632eba4b4adde', '[\"p\", \"q\", \"r\", \"s\", \"t\"]'); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a9fb5c985c90bf05f3bee5ca3ae95260', '[\"u\", \"v\"]'); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '[\"w\"]'); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '[\"x\"]'); """ - sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a5', '{"inventors":[]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '{"inventors":[null,null,null]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a7', '{"inventors":[null,null,null]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a8', '{"inventors":[]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a648a447b8f71522f11632eba4b4adde', '{"inventors":["p", "q", "r", "s", "t"]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a9fb5c985c90bf05f3bee5ca3ae95260', '{"inventors":["u", "v"]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '{"inventors":["w"]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '{"inventors":["x"]}'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '{"inventors":["y"]}'); """ sql """ set enable_common_expr_pushdown = true """ qt_sql """ select count() from ${indexTblName}""" - def param_contains = ["\'s\'", "\'\'", null] + def param_contains = ["'s'", "''", null] for (int i = 0 ; i < param_contains.size(); ++i) { def p = param_contains[i] log.info("param: ${p}") - order_qt_sql """ select * from tai where array_contains(inventors, ${p}) order by id; """ - order_qt_sql """ select * from tai where array_contains(inventors, ${p}) and apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select * from tai where array_contains(inventors, ${p}) and apply_date = '2019-01-01' order by id; """ - order_qt_sql """ select * from tai where array_contains(inventors, ${p}) or apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select * from tai where !array_contains(inventors, ${p}) order by id; """ - order_qt_sql """ select * from tai where !array_contains(inventors, ${p}) and apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select * from tai where !array_contains(inventors, ${p}) and apply_date = '2019-01-01' order by id; """ - order_qt_sql """ select * from tai where !array_contains(inventors, ${p}) or apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select * from tai where (array_contains(inventors, ${p}) and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select * from tai where array_contains(cast(inventors['inventors'] as array<text>), ${p}) order by id; """ + order_qt_sql """ select * from tai where array_contains(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select * from tai where array_contains(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select * from tai where array_contains(cast(inventors['inventors'] as array<text>), ${p}) or apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select * from tai where !array_contains(cast(inventors['inventors'] as array<text>), ${p}) order by id; """ + order_qt_sql """ select * from tai where !array_contains(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select * from tai where !array_contains(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select * from tai where !array_contains(cast(inventors['inventors'] as array<text>), ${p}) or apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select * from tai where (array_contains(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """ } // test arrays_overlap with inverted index // now if we use inverted index we will not eval exprs - def param = [["\'s\'", "\'t\'"], [], null, ["\'s\'", "\'\'", "\'t\'"], ["\'s\'", null, "\'t\'"], [null, "\'\'"], ["\'s\'", null, "\'t\'", "\'\'"]] // null for arrays_overlap will return null which in predicate will lead to return empty set + def param = [["'s'", "'t'"], [], null, ["'s'", "''", "'t'"], ["'s'", null, "'t'"], [null, "''"], ["'s'", null, "'t'", "''"]] // null for arrays_overlap will return null which in predicate will lead to return empty set for (int i = 0 ; i < param.size(); ++i) { def p = param[i] log.info("param: ${p}") - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where arrays_overlap(inventors, ${p}) order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where arrays_overlap(inventors, ${p}) order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where arrays_overlap(inventors, ${p}) and apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where arrays_overlap(inventors, ${p}) and apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where arrays_overlap(inventors, ${p}) and apply_date = '2019-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where arrays_overlap(inventors, ${p}) and apply_date = '2019-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where arrays_overlap(inventors, ${p}) or apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where arrays_overlap(inventors, ${p}) or apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where !arrays_overlap(inventors, ${p}) order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where !arrays_overlap(inventors, ${p}) order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where !arrays_overlap(inventors, ${p}) and apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where !arrays_overlap(inventors, ${p}) and apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where !arrays_overlap(inventors, ${p}) and apply_date = '2019-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where !arrays_overlap(inventors, ${p}) and apply_date = '2019-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where !arrays_overlap(inventors, ${p}) or apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where !arrays_overlap(inventors, ${p}) or apply_date = '2017-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where (arrays_overlap(inventors, ${p}) and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """ - order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where (arrays_overlap(inventors, ${p}) and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) or apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) or apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) or apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) or apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = true)*/ * from tai where (arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown = false)*/ * from tai where (arrays_overlap(cast(inventors['inventors'] as array<text>), ${p}) and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """ } } diff --git a/regression-test/suites/variant_p0/with_index/test_array_index_write.groovy b/regression-test/suites/variant_p0/with_index/test_array_index_write.groovy new file mode 100644 index 00000000000..ba37537a8cc --- /dev/null +++ b/regression-test/suites/variant_p0/with_index/test_array_index_write.groovy @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_index_write", "nonConcurrent"){ + sql "set disable_inverted_index_v1_for_variant = false" + def create_variant_index_table = { testTablex, parser -> + def stmt = "CREATE TABLE IF NOT EXISTS " + testTablex + "(\n" + + " k1 INT NULL,\n" + + " c_arr VARIANT NULL COMMENT '',\n" + + String strTmp = parser == "" ? "INDEX index_inverted_c_arr(c_arr) USING INVERTED COMMENT 'c_arr index',\n" : + "INDEX index_inverted_c_arr(c_arr) USING INVERTED PROPERTIES( \"parser\"=\" " + parser + "\") COMMENT 'c_arr index',\n" + + stmt += strTmp + stmt = stmt.substring(0, stmt.length()-2) + def storageFormat = new Random().nextBoolean() ? "V1" : "V2" + stmt += ") \nENGINE=OLAP\n" + + "DUPLICATE KEY(`k1`)\n" + + "COMMENT 'OLAP'\n" + + "DISTRIBUTED BY HASH(`k1`) BUCKETS 10\n" + + "PROPERTIES(\"replication_num\" = \"1\", \"inverted_index_storage_format\" = \"$storageFormat\");" + return stmt + } + + def indexTbName = "test_variant_index_parser_empty" + sql create_variant_index_table.call(indexTbName, "") + + def checkpoints_name = "array_inverted_index.write_index" + + try { + GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, [single_array_field_count: 3]) + sql "insert into ${indexTbName} values(1, '{\"c_arr\": [\"amory\", \"is\", \"committer\"]}')" + } finally { + GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name) + } + + try { + GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, [single_array_field_count: 2]) + sql "insert into ${indexTbName} values(2, '{\"c_arr\": [\"amory\", \"better\"]}')" + } finally { + GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name) + } + + try { + GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, [single_array_field_count: 1]) + sql "insert into ${indexTbName} values(3, '{\"c_arr\": [\"amory\", null]}')" + } finally { + GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name) + } + + try { + GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, [single_array_field_count: 0]) + sql "insert into ${indexTbName} values(4, '{\"c_arr\": [null, null]}')" + } finally { + GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name) + } + + try { + GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, [single_array_field_count: 0]) + sql "insert into ${indexTbName} values(5, '{\"c_arr\": []}')" + } finally { + GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name) + } + + try { + GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, [single_array_field_count: 0]) + sql "insert into ${indexTbName} values(6, null)" + } finally { + GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name) + } + + // test multiple types + sql """insert into ${indexTbName} values(6, '{"c_arr" : ["text"]}')""" + sql """insert into ${indexTbName} values(6, '{"c_arr" : [1.1]}')""" + sql """insert into ${indexTbName} values(6, '{"c_arr" : [1.0]}')""" + sql """insert into ${indexTbName} values(6, '{"c_arr" : [90]}')""" + sql """insert into ${indexTbName} values(6, '{"c_arr" : [90999999999999]}')""" + +} diff --git a/regression-test/suites/variant_p0/with_index/test_array_inverted_index_profile.groovy b/regression-test/suites/variant_p0/with_index/test_array_inverted_index_profile.groovy new file mode 100644 index 00000000000..fb0af622b2a --- /dev/null +++ b/regression-test/suites/variant_p0/with_index/test_array_inverted_index_profile.groovy @@ -0,0 +1,156 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import groovy.json.JsonSlurper +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +suite("test_variant_arrayInvertedIdx_profile", "nonConcurrent"){ + // prepare test table + def indexTblName = "var_arr_idx" + def httpGet = { url -> + def dst = 'http://' + context.config.feHttpAddress + def conn = new URL(dst + url).openConnection() + conn.setRequestMethod("GET") + def encoding = Base64.getEncoder().encodeToString((context.config.feHttpUser + ":" + + (context.config.feHttpPassword == null ? "" : context.config.feHttpPassword)).getBytes("UTF-8")) + conn.setRequestProperty("Authorization", "Basic ${encoding}") + return conn.getInputStream().getText() + } + + def checkRowsInvertedIndexFilter = { sql, expectedRowsInvertedIndexFiltered -> + order_qt_sql sql + def profileUrl = '/rest/v1/query_profile/' + def profiles = httpGet(profileUrl) + log.debug("profiles:{}", profiles); + profiles = new JsonSlurper().parseText(profiles) + assertEquals(0, profiles.code) + + def profileId = null; + for (def profile in profiles["data"]["rows"]) { + if (profile["Sql Statement"].contains(sql)) { + profileId = profile["Profile ID"] + break; + } + } + log.info("profileId:{}", profileId); + def profileDetail = httpGet("/rest/v1/query_profile/" + profileId) + String regex = "RowsInvertedIndexFiltered:.*(\\d+)" + Pattern pattern = Pattern.compile(regex) + Matcher matcher = pattern.matcher(profileDetail) + log.info("profileDetail:{}", profileDetail); + while (matcher.find()) { + int number = Integer.parseInt(matcher.group(1)) + log.info("filter number:{}", number) + assertEquals(expectedRowsInvertedIndexFiltered, number) + } + } + + // If we use common expr pass to inverted index , we should set enable_common_expr_pushdown = true + sql """ set enable_common_expr_pushdown = true; """ + sql """ set enable_profile = true;""" + sql """ set profile_level = 2;""" + sql "set disable_inverted_index_v1_for_variant = false" + + sql "DROP TABLE IF EXISTS ${indexTblName}" + def storageFormat = new Random().nextBoolean() ? "V1" : "V2" + // create 1 replica table + sql """ + CREATE TABLE IF NOT EXISTS `${indexTblName}` ( + `apply_date` date NULL COMMENT '', + `id` varchar(60) NOT NULL COMMENT '', + `inventors` variant NULL COMMENT '', + INDEX index_inverted_inventors(inventors) USING INVERTED COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`apply_date`, `id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false", + "inverted_index_storage_format" = "$storageFormat" + ); + """ + + sql """ INSERT INTO `var_arr_idx` (`apply_date`, `id`, `inventors`) VALUES + ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '{"inventors":["a", "b", "c"]}'), + ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a3', '{"inventors":[]}'), + ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '{"inventors": ["d", "e", "f", "g", "h", "i", "j", "k", "l"]}'), + ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a4', NULL), + ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332e', '{"inventors": ["m", "n", "o", "p", "q", "r", "s", "t", "u"]}'), + ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '{"inventors": [null,null,null]}'), + ('2019-01-01', 'd93d942d985a8fb7547c72dada8d332f', '{"inventors": ["v", "w", "x", "y", "z"]}'); """ + + + qt_sql1 """ select count() from ${indexTblName}""" + def checkpoints_name = "array_func.array_contains" + try { + GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, [result_bitmap: 1]) + order_qt_sql2 "select apply_date,id, inventors['inventors'] from var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>), 'w') order by id;" + } finally { + GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name) + } + + int randomInt = new Random().nextInt(10) + + if (randomInt % 2) { + profile("test_profile_time_${randomInt}") { + run { + sql "/* test_profile_time_${randomInt} */ select apply_date,id, inventors['inventors'] from var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>), 'w') order by id" + } + + check { profileString, exception -> + log.info(profileString) + assertTrue(profileString.contains("RowsInvertedIndexFiltered: 6")) + } + } + } else { + profile("test_profile_time_${randomInt}") { + run { + sql "/* test_profile_time_${randomInt} */ select apply_date,id, inventors['inventors'] from var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>), 's') and apply_date = '2017-01-01' order by id" + } + + check { profileString, exception -> + log.info(profileString) + assertTrue(profileString.contains("RowsInvertedIndexFiltered: 5")) + } + } + } + + + // checkRowsInvertedIndexFilter.call("select apply_date,id, inventors['inventors'] from var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>), 'w') order by id;", 6) + + try { + GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, [result_bitmap: 1]) + order_qt_sql3 """ select apply_date,id, inventors['inventors'] from var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>), 's') and apply_date = '2017-01-01' order by id; """ + } finally { + GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name) + } + // and apply_date will be vectorized filter left is 6 rows for inverted index + order_qt_sql4 """ select apply_date,id, inventors['inventors'] from var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>), 's') and apply_date = '2019-01-01' order by id; """ + + order_qt_sql5 """ select apply_date,id, inventors['inventors'] from var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>), 's') or apply_date = '2017-01-01' order by id; """ + order_qt_sql6 """ select apply_date,id, inventors['inventors'] from var_arr_idx where !array_contains(cast(inventors['inventors'] as array<text>), 's') order by id; """ + order_qt_sql7 """ select apply_date,id, inventors['inventors'] from var_arr_idx where !array_contains(cast(inventors['inventors'] as array<text>), 's') and apply_date = '2017-01-01' order by id; """ + order_qt_sql8 """ select apply_date,id, inventors['inventors'] from var_arr_idx where !array_contains(cast(inventors['inventors'] as array<text>), 's') and apply_date = '2019-01-01' order by id; """ + order_qt_sql9 """ select apply_date,id, inventors['inventors'] from var_arr_idx where !array_contains(cast(inventors['inventors'] as array<text>), 's') or apply_date = '2017-01-01' order by id; """ + order_qt_sql10 """ select apply_date,id, inventors['inventors'] from var_arr_idx where (array_contains(cast(inventors['inventors'] as array<text>), 's') and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """ +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org