This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant-sparse in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push: new 3e500feb482 [Improve](predefine) add index type check for sub field in variant (#50241) 3e500feb482 is described below commit 3e500feb482179b8cd4efeedb5d96e4b5dc2c8fe Author: lihangyu <lihan...@selectdb.com> AuthorDate: Mon Apr 21 23:07:09 2025 +0800 [Improve](predefine) add index type check for sub field in variant (#50241) --- .../org/apache/doris/analysis/CreateTableStmt.java | 9 ++ .../java/org/apache/doris/analysis/IndexDef.java | 20 ++- .../trees/plans/commands/info/CreateTableInfo.java | 9 ++ .../trees/plans/commands/info/IndexDefinition.java | 19 ++- .../data/inverted_index_p0/test_array_index2.out | Bin 0 -> 277 bytes .../inverted_index_p0/test_array_index2.groovy | 158 +++++++++++++++++++++ .../variant_p0/predefine/test_predefine_ddl.groovy | 18 +++ 7 files changed, 227 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index 552d178763a..c85bc5e1bbd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -782,6 +782,8 @@ public class CreateTableStmt extends DdlStmt implements NotFallbackInParser { return !engineName.equals("olap"); } + // 1. if the column is variant type, check it's field pattern is valid + // 2. if the column is not variant type, check it's index def is valid private void columnToIndexesCheck() throws AnalysisException { for (Map.Entry<Column, List<IndexDef>> entry : columnToIndexes.entrySet()) { Column column = entry.getKey(); @@ -798,10 +800,12 @@ public class CreateTableStmt extends DdlStmt implements NotFallbackInParser { continue; } boolean findFieldPattern = false; + boolean fieldSupportedIndex = false; VariantType variantType = (VariantType) column.getType(); for (VariantField field : variantType.getPredefinedFields()) { if (field.getPattern().equals(fieldPattern)) { findFieldPattern = true; + fieldSupportedIndex = IndexDef.isSupportIdxType(field.getType()); break; } } @@ -809,6 +813,11 @@ public class CreateTableStmt extends DdlStmt implements NotFallbackInParser { throw new AnalysisException("can not find field pattern: " + fieldPattern + " in variant column: " + column.getName()); } + if (!fieldSupportedIndex) { + throw new AnalysisException("field pattern: " + + fieldPattern + " is not supported for inverted index" + + " of column: " + column.getName()); + } } if (variantIndex > 1) { throw new AnalysisException("variant column: " + column.getName() diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java index e486ded7104..ce7c1a4ae28 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java @@ -17,9 +17,11 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.thrift.TInvertedIndexFileStorageFormat; @@ -218,6 +220,19 @@ public class IndexDef { return (this.indexType == IndexType.INVERTED); } + // Check if the column type is supported for inverted index + public static boolean isSupportIdxType(Type colType) { + if (colType.isArrayType()) { + Type itemType = ((ArrayType) colType).getItemType(); + return isSupportIdxType(itemType); + } + PrimitiveType primitiveType = colType.getPrimitiveType(); + return primitiveType.isDateType() || primitiveType.isDecimalV2Type() || primitiveType.isDecimalV3Type() + || primitiveType.isFixedPointType() || primitiveType.isStringType() + || primitiveType == PrimitiveType.BOOLEAN + || primitiveType.isVariantType() || primitiveType.isIPType(); + } + public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKeyMergeOnWrite, TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat, boolean disableInvertedIndexV1ForVariant) throws AnalysisException { @@ -226,9 +241,8 @@ public class IndexDef { String indexColName = column.getName(); caseSensitivityColumns.add(indexColName); PrimitiveType colType = column.getDataType(); - if (!(colType.isDateType() || colType.isDecimalV2Type() || colType.isDecimalV3Type() - || colType.isFixedPointType() || colType.isStringType() || colType == PrimitiveType.BOOLEAN - || colType.isVariantType() || colType.isIPType() || colType.isArrayType())) { + Type columnType = column.getType(); + if (!isSupportIdxType(columnType)) { throw new AnalysisException(colType + " is not supported in " + indexType.toString() + " index. " + "invalid index: " + indexName); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java index 3601de41732..32ddbafa3ce 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java @@ -984,6 +984,8 @@ public class CreateTableInfo { } } + // 1. if the column is variant type, check it's field pattern is valid + // 2. if the column is not variant type, check it's index def is valid private void columnToIndexesCheck() { for (Map.Entry<ColumnDefinition, List<IndexDefinition>> entry : columnToIndexes.entrySet()) { ColumnDefinition column = entry.getKey(); @@ -1000,11 +1002,13 @@ public class CreateTableInfo { continue; } boolean findFieldPattern = false; + boolean fieldSupportedIndex = false; VariantType variantType = (VariantType) column.getType(); List<VariantField> predefinedFields = variantType.getPredefinedFields(); for (VariantField field : predefinedFields) { if (field.getPattern().equals(fieldPattern)) { findFieldPattern = true; + fieldSupportedIndex = IndexDefinition.isSupportIdxType(field.getDataType()); break; } } @@ -1012,6 +1016,11 @@ public class CreateTableInfo { throw new AnalysisException("can not find field pattern: " + fieldPattern + " in column: " + column.getName()); } + if (!fieldSupportedIndex) { + throw new AnalysisException("field pattern: " + + fieldPattern + " is not supported for inverted index" + + " of column: " + column.getName()); + } } if (variantIndex > 1) { throw new AnalysisException("column: " + column.getName() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java index 16ecf4199fa..5d4e592bd53 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java @@ -24,6 +24,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Index; import org.apache.doris.catalog.KeysType; import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.util.Utils; import org.apache.doris.thrift.TInvertedIndexFileStorageFormat; @@ -107,6 +108,20 @@ public class IndexDefinition { this.comment = null; } + /** + * Check if the column type is supported for inverted index + */ + public static boolean isSupportIdxType(DataType columnType) { + if (columnType.isArrayType()) { + DataType itemType = ((ArrayType) columnType).getItemType(); + return isSupportIdxType(itemType); + } + return columnType.isDateLikeType() || columnType.isDecimalLikeType() + || columnType.isIntegralType() || columnType.isStringLikeType() + || columnType.isBooleanType() || columnType.isVariantType() + || columnType.isIPType(); + } + /** * checkColumn */ @@ -119,9 +134,7 @@ public class IndexDefinition { String indexColName = column.getName(); caseSensitivityCols.add(indexColName); DataType colType = column.getType(); - if (!(colType.isDateLikeType() || colType.isDecimalLikeType() || colType.isArrayType() - || colType.isIntegralType() || colType.isStringLikeType() - || colType.isBooleanType() || colType.isVariantType() || colType.isIPType())) { + if (!isSupportIdxType(colType)) { // TODO add colType.isAggState() throw new AnalysisException(colType + " is not supported in " + indexType.toString() + " index. " + "invalid index: " + name); diff --git a/regression-test/data/inverted_index_p0/test_array_index2.out b/regression-test/data/inverted_index_p0/test_array_index2.out new file mode 100644 index 00000000000..03ffac07a76 Binary files /dev/null and b/regression-test/data/inverted_index_p0/test_array_index2.out differ diff --git a/regression-test/suites/inverted_index_p0/test_array_index2.groovy b/regression-test/suites/inverted_index_p0/test_array_index2.groovy new file mode 100644 index 00000000000..e10065be141 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_array_index2.groovy @@ -0,0 +1,158 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_index2") { + def tableName1 = "array_test_supported" + def tableName2 = "array_test_unsupported" + + def timeout = 60000 + def delta_time = 1000 + def alter_res = "null" + def useTime = 0 + + def wait_for_latest_op_on_table_finish = { table_name, OpTimeout -> + for(int t = delta_time; t <= OpTimeout; t += delta_time) { + alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = "${table_name}" ORDER BY CreateTime DESC LIMIT 1;""" + alter_res = alter_res.toString() + if(alter_res.contains("FINISHED")) { + sleep(10000) // wait change table state to normal + logger.info(table_name + " latest alter job finished, detail: " + alter_res) + break + } + useTime = t + sleep(delta_time) + } + assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish timeout") + } + + sql "DROP TABLE IF EXISTS ${tableName1}" + sql "DROP TABLE IF EXISTS ${tableName2}" + + // Create table with supported array types + sql """ + CREATE TABLE ${tableName1} ( + id int, + str_arr ARRAY<STRING>, + int_arr ARRAY<INT>, + date_arr ARRAY<DATE> + ) ENGINE=OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + // Insert test data before creating indexes + sql """ INSERT INTO ${tableName1} VALUES + (1, ['hello', 'world'], [1, 2, 3], ['2023-01-01', '2023-01-02']), + (2, ['doris', 'apache'], [4, 5, 6], ['2023-02-01', '2023-02-02']), + (3, NULL, NULL, NULL), + (4, [], [], []), + (5, ['test', 'array'], [7, 8, 9], ['2023-03-01', '2023-03-02']), + (6, ['index', 'support'], [10, 11, 12], ['2023-04-01', '2023-04-02']); + """ + + // Create indexes on supported array types - should succeed + sql """ ALTER TABLE ${tableName1} ADD INDEX idx_str_arr (str_arr) USING INVERTED; """ + wait_for_latest_op_on_table_finish(tableName1, timeout) + + sql """ ALTER TABLE ${tableName1} ADD INDEX idx_int_arr (int_arr) USING INVERTED; """ + wait_for_latest_op_on_table_finish(tableName1, timeout) + + sql """ ALTER TABLE ${tableName1} ADD INDEX idx_date_arr (date_arr) USING INVERTED; """ + wait_for_latest_op_on_table_finish(tableName1, timeout) + + // Create table with unsupported array types + sql """ + CREATE TABLE ${tableName2} ( + id int, + nested_arr ARRAY<ARRAY<STRING>>, + map_arr ARRAY<MAP<STRING,INT>>, + float_arr ARRAY<FLOAT>, + struct_arr ARRAY<STRUCT< + name:STRING, + age:INT, + score:FLOAT + >> + ) ENGINE=OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + // Insert some data into unsupported array type table + sql """ INSERT INTO ${tableName2} VALUES + (1, [['a', 'b'], ['c', 'd']], [{'key1': 1, 'key2': 2}], [1.1, 2.2], array(named_struct('name', 'Alice', 'age', 20, 'score', 85.5))), + (2, [['e', 'f']], [{'key3': 3}], [3.3], array(named_struct('name', 'Bob', 'age', 25, 'score', 90.0))); + """ + + sql """ ALTER TABLE ${tableName2} ADD INDEX idx_nested_arr (nested_arr) USING INVERTED; """ + wait_for_latest_op_on_table_finish(tableName2, timeout) + + // Test creating index on array of map - should fail + test { + sql """ ALTER TABLE ${tableName2} ADD INDEX idx_map_arr (map_arr) USING INVERTED; """ + exception "is not supported in" + } + + // Test creating index on array of float - should fail + test { + sql """ ALTER TABLE ${tableName2} ADD INDEX idx_float_arr (float_arr) USING INVERTED; """ + exception "is not supported in" + } + + // Test creating index on array of struct - should fail + test { + sql """ ALTER TABLE ${tableName2} ADD INDEX idx_struct_arr (struct_arr) USING INVERTED; """ + exception "is not supported in" + } + + // Test array_contains function + qt_sql """ + SELECT id, str_arr, int_arr, date_arr + FROM ${tableName1} + WHERE array_contains(str_arr, 'world') + OR array_contains(int_arr, 8) + OR array_contains(date_arr, '2023-03-01') + ORDER BY id; + """ + + // Test array_contains with multiple conditions + qt_sql """ + SELECT id + FROM ${tableName1} + WHERE array_contains(str_arr, 'apache') + AND array_contains(int_arr, 5) + AND array_contains(date_arr, '2023-02-02') + ORDER BY id; + """ + + // Test array_contains with NULL and empty arrays + qt_sql """ + SELECT id, str_arr + FROM ${tableName1} + WHERE array_contains(str_arr, 'test') + OR str_arr IS NULL + ORDER BY id; + """ + + sql "DROP TABLE IF EXISTS ${tableName1}" + sql "DROP TABLE IF EXISTS ${tableName2}" +} diff --git a/regression-test/suites/variant_p0/predefine/test_predefine_ddl.groovy b/regression-test/suites/variant_p0/predefine/test_predefine_ddl.groovy index a40c0708d29..40f351bdd2b 100644 --- a/regression-test/suites/variant_p0/predefine/test_predefine_ddl.groovy +++ b/regression-test/suites/variant_p0/predefine/test_predefine_ddl.groovy @@ -257,4 +257,22 @@ suite("test_predefine_ddl", "p0"){ findException = true } assertTrue(findException) + + findException = false + try { + sql "DROP TABLE IF EXISTS test_ddl_table" + sql """CREATE TABLE test_ddl_table ( + `id` bigint NULL, + `var` variant< + MATCH_NAME 'ab' : double + > NULL, + INDEX idx_ab (var) USING INVERTED PROPERTIES("field_pattern"="ab", "parser"="unicode", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) + BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "disable_auto_compaction" = "true")""" + } catch (Exception e) { + log.info(e.getMessage()) + assertTrue(e.getMessage().contains("is not supported for inverted index")) + findException = true + } + assertTrue(findException) } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org