This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new cf243787a16 Branch-3.0 [fix](array-index) Fix types that do not support indexing also in array nesting should also not be supported. #50162 (#50315) cf243787a16 is described below commit cf243787a16579b999ebc7f9026002ab3b655b59 Author: amory <wangqian...@selectdb.com> AuthorDate: Tue Apr 29 14:00:02 2025 +0800 Branch-3.0 [fix](array-index) Fix types that do not support indexing also in array nesting should also not be supported. #50162 (#50315) ### What problem does this PR solve? backpork: https://github.com/apache/doris/pull/50162 and patch https://github.com/apache/doris/pull/50382 Issue Number: close #xxx --- .../java/org/apache/doris/analysis/IndexDef.java | 23 ++- .../trees/plans/commands/info/IndexDefinition.java | 22 ++- .../org/apache/doris/analysis/IndexDefTest.java | 71 +++++++++ .../trees/plans/commands/IndexDefinitionTest.java | 73 ++++++++++ .../data/inverted_index_p0/test_array_index2.out | Bin 0 -> 277 bytes .../inverted_index_p0/test_array_index2.groovy | 160 +++++++++++++++++++++ 6 files changed, 343 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java index 530c0c3f9ff..edf62f44d52 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java @@ -17,9 +17,11 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.thrift.TInvertedIndexFileStorageFormat; @@ -214,6 +216,22 @@ public class IndexDef { return (this.indexType == IndexType.INVERTED); } + // Check if the column type is supported for inverted index + public boolean isSupportIdxType(Type colType) { + if (colType.isArrayType()) { + Type itemType = ((ArrayType) colType).getItemType(); + if (itemType.isArrayType()) { + return false; + } + return isSupportIdxType(itemType); + } + PrimitiveType primitiveType = colType.getPrimitiveType(); + return primitiveType.isDateType() || primitiveType.isDecimalV2Type() || primitiveType.isDecimalV3Type() + || primitiveType.isFixedPointType() || primitiveType.isStringType() + || primitiveType == PrimitiveType.BOOLEAN + || primitiveType.isVariantType() || primitiveType.isIPType(); + } + public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKeyMergeOnWrite, TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat) throws AnalysisException { if (indexType == IndexType.BITMAP || indexType == IndexType.INVERTED || indexType == IndexType.BLOOMFILTER @@ -221,9 +239,8 @@ public class IndexDef { String indexColName = column.getName(); caseSensitivityColumns.add(indexColName); PrimitiveType colType = column.getDataType(); - if (!(colType.isDateType() || colType.isDecimalV2Type() || colType.isDecimalV3Type() - || colType.isFixedPointType() || colType.isStringType() || colType == PrimitiveType.BOOLEAN - || colType.isVariantType() || colType.isIPType() || colType.isArrayType())) { + Type columnType = column.getType(); + if (!isSupportIdxType(columnType)) { throw new AnalysisException(colType + " is not supported in " + indexType.toString() + " index. " + "invalid index: " + indexName); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java index 77a90755250..b0f97e2fc27 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java @@ -25,6 +25,7 @@ import org.apache.doris.catalog.Index; import org.apache.doris.catalog.KeysType; import org.apache.doris.common.Config; import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.util.Utils; import org.apache.doris.thrift.TInvertedIndexFileStorageFormat; @@ -91,6 +92,23 @@ public class IndexDefinition { this.comment = comment; } + /** + * Check if the column type is supported for inverted index + */ + public boolean isSupportIdxType(DataType columnType) { + if (columnType.isArrayType()) { + DataType itemType = ((ArrayType) columnType).getItemType(); + if (itemType.isArrayType()) { + return false; + } + return isSupportIdxType(itemType); + } + return columnType.isDateLikeType() || columnType.isDecimalLikeType() + || columnType.isIntegralType() || columnType.isStringLikeType() + || columnType.isBooleanType() || columnType.isVariantType() + || columnType.isIPType(); + } + /** * checkColumn */ @@ -102,9 +120,7 @@ public class IndexDefinition { String indexColName = column.getName(); caseSensitivityCols.add(indexColName); DataType colType = column.getType(); - if (!(colType.isDateLikeType() || colType.isDecimalLikeType() || colType.isArrayType() - || colType.isIntegralType() || colType.isStringLikeType() - || colType.isBooleanType() || colType.isVariantType() || colType.isIPType())) { + if (!isSupportIdxType(colType)) { // TODO add colType.isAggState() throw new AnalysisException(colType + " is not supported in " + indexType.toString() + " index. " + "invalid index: " + name); diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java index 3d528ce1b68..96bcd033e61 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java @@ -17,9 +17,14 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.ScalarType; +import org.apache.doris.catalog.StructField; +import org.apache.doris.catalog.StructType; import org.apache.doris.common.AnalysisException; import org.apache.doris.thrift.TInvertedIndexFileStorageFormat; @@ -28,6 +33,8 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import java.util.ArrayList; + public class IndexDefTest { private IndexDef def; @@ -80,4 +87,68 @@ public class IndexDefTest { Assert.assertEquals("INDEX `index1` ON table1 (`col1`) USING INVERTED COMMENT 'balabala'", def.toSql("table1")); } + + @Test + public void testArrayTypeSupport() throws AnalysisException { + def = new IndexDef("array_index", false, Lists.newArrayList("col1"), + IndexDef.IndexType.INVERTED, null, "array test"); + + // Test array of supported types + Column arrayOfString = new Column("col1", + ArrayType.create(ScalarType.createVarchar(10), false)); + def.checkColumn(arrayOfString, KeysType.DUP_KEYS, true, TInvertedIndexFileStorageFormat.V1); + + Column arrayOfInt = new Column("col1", + ArrayType.create(ScalarType.createType(PrimitiveType.INT), false)); + def.checkColumn(arrayOfInt, KeysType.DUP_KEYS, true, TInvertedIndexFileStorageFormat.V1); + + Column arrayOfDate = new Column("col1", + ArrayType.create(ScalarType.createType(PrimitiveType.DATE), false)); + def.checkColumn(arrayOfDate, KeysType.DUP_KEYS, true, TInvertedIndexFileStorageFormat.V1); + + // Array<Array<String>> + try { + Column nestedArray = new Column("col1", + ArrayType.create(ArrayType.create(ScalarType.createVarchar(10), false), false)); + def.checkColumn(nestedArray, KeysType.DUP_KEYS, true, TInvertedIndexFileStorageFormat.V1); + Assert.fail("No exception throws for unsupported array element type."); + } catch (AnalysisException e) { + Assert.assertTrue(e.getMessage().contains("is not supported in")); + } + + // Test array of unsupported types + try { + Column arrayOfFloat = new Column("col1", + ArrayType.create(ScalarType.createType(PrimitiveType.FLOAT), false)); + def.checkColumn(arrayOfFloat, KeysType.DUP_KEYS, true, TInvertedIndexFileStorageFormat.V1); + Assert.fail("No exception throws for unsupported array element type."); + } catch (AnalysisException e) { + Assert.assertTrue(e.getMessage().contains("is not supported in")); + } + + try { + // Array<Map<String, Int>> + Column arrayOfMap = new Column("col1", + ArrayType.create(new MapType( + ScalarType.createVarchar(10), + ScalarType.createType(PrimitiveType.INT)), false)); + def.checkColumn(arrayOfMap, KeysType.DUP_KEYS, true, TInvertedIndexFileStorageFormat.V1); + Assert.fail("No exception throws for array of map type."); + } catch (AnalysisException e) { + Assert.assertTrue(e.getMessage().contains("is not supported in")); + } + + try { + // Array<Struct<name:String, age:Int>> + ArrayList<StructField> fields = new ArrayList<>(); + fields.add(new StructField("name", ScalarType.createVarchar(10), null)); + fields.add(new StructField("age", ScalarType.createType(PrimitiveType.INT), null)); + Column arrayOfStruct = new Column("col1", + ArrayType.create(new StructType(fields), false)); + def.checkColumn(arrayOfStruct, KeysType.DUP_KEYS, true, TInvertedIndexFileStorageFormat.V1); + Assert.fail("No exception throws for array of struct type."); + } catch (AnalysisException e) { + Assert.assertTrue(e.getMessage().contains("is not supported in")); + } + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java index 803ae6d508b..93caa559cc2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java @@ -22,8 +22,13 @@ import org.apache.doris.catalog.KeysType; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.plans.commands.info.ColumnDefinition; import org.apache.doris.nereids.trees.plans.commands.info.IndexDefinition; +import org.apache.doris.nereids.types.ArrayType; +import org.apache.doris.nereids.types.FloatType; import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.nereids.types.MapType; import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.StructField; +import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.types.VariantType; import org.apache.doris.thrift.TInvertedIndexFileStorageFormat; @@ -31,6 +36,7 @@ import com.google.common.collect.Lists; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.util.ArrayList; import java.util.HashMap; import java.util.Map; @@ -50,6 +56,73 @@ public class IndexDefinitionTest { } } + void testArrayTypeSupport() throws AnalysisException { + IndexDefinition def = new IndexDefinition("array_index", Lists.newArrayList("col1"), + "INVERTED", null, "array test"); + + // Test array of supported types + def.checkColumn(new ColumnDefinition("col1", + ArrayType.of(StringType.INSTANCE), false, AggregateType.NONE, true, null, "comment"), + KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1); + + def.checkColumn(new ColumnDefinition("col1", + ArrayType.of(IntegerType.INSTANCE), false, AggregateType.NONE, true, null, "comment"), + KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1); + + def.checkColumn(new ColumnDefinition("col1", + ArrayType.of(ArrayType.of(StringType.INSTANCE)), false, + AggregateType.NONE, true, null, "comment"), + KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1); + + // Test array of unsupported types + try { + // Array<Float> + def.checkColumn(new ColumnDefinition("col1", + ArrayType.of(FloatType.INSTANCE), false, + AggregateType.NONE, true, null, "comment"), + KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1); + Assertions.fail("No exception throws for unsupported array element type (Float)."); + } catch (AnalysisException e) { + Assertions.assertTrue(e.getMessage().contains("is not supported in")); + } + + try { + // Array<Array<String>> + def.checkColumn(new ColumnDefinition("col1", + ArrayType.of(ArrayType.of(StringType.INSTANCE)), false, + AggregateType.NONE, true, null, "comment"), + KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1); + Assertions.fail("No exception throws for array of array type."); + } catch (AnalysisException e) { + Assertions.assertTrue(e.getMessage().contains("is not supported in")); + } + + try { + // Array<Map<String, Int>> + def.checkColumn(new ColumnDefinition("col1", + ArrayType.of(MapType.of(StringType.INSTANCE, IntegerType.INSTANCE)), false, + AggregateType.NONE, true, null, "comment"), + KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1); + Assertions.fail("No exception throws for array of map type."); + } catch (AnalysisException e) { + Assertions.assertTrue(e.getMessage().contains("is not supported in")); + } + + try { + // Array<Struct<name:String, age:Int>> + ArrayList<StructField> fields = new ArrayList<>(); + fields.add(new StructField("name", StringType.INSTANCE, true, null)); + fields.add(new StructField("age", IntegerType.INSTANCE, true, null)); + def.checkColumn(new ColumnDefinition("col1", + ArrayType.of(new StructType(fields)), false, + AggregateType.NONE, true, null, "comment"), + KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1); + Assertions.fail("No exception throws for array of struct type."); + } catch (AnalysisException e) { + Assertions.assertTrue(e.getMessage().contains("is not supported in")); + } + } + @Test void testNgramBFIndex() throws AnalysisException { Map<String, String> properties = new HashMap<>(); diff --git a/regression-test/data/inverted_index_p0/test_array_index2.out b/regression-test/data/inverted_index_p0/test_array_index2.out new file mode 100644 index 00000000000..03ffac07a76 Binary files /dev/null and b/regression-test/data/inverted_index_p0/test_array_index2.out differ diff --git a/regression-test/suites/inverted_index_p0/test_array_index2.groovy b/regression-test/suites/inverted_index_p0/test_array_index2.groovy new file mode 100644 index 00000000000..9fb6747ca83 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_array_index2.groovy @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_index2") { + def tableName1 = "array_test_supported" + def tableName2 = "array_test_unsupported" + + def timeout = 60000 + def delta_time = 1000 + def alter_res = "null" + def useTime = 0 + + def wait_for_latest_op_on_table_finish = { table_name, OpTimeout -> + for(int t = delta_time; t <= OpTimeout; t += delta_time) { + alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = "${table_name}" ORDER BY CreateTime DESC LIMIT 1;""" + alter_res = alter_res.toString() + if(alter_res.contains("FINISHED")) { + sleep(10000) // wait change table state to normal + logger.info(table_name + " latest alter job finished, detail: " + alter_res) + break + } + useTime = t + sleep(delta_time) + } + assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish timeout") + } + + sql "DROP TABLE IF EXISTS ${tableName1}" + sql "DROP TABLE IF EXISTS ${tableName2}" + + // Create table with supported array types + sql """ + CREATE TABLE ${tableName1} ( + id int, + str_arr ARRAY<STRING>, + int_arr ARRAY<INT>, + date_arr ARRAY<DATE> + ) ENGINE=OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + // Insert test data before creating indexes + sql """ INSERT INTO ${tableName1} VALUES + (1, ['hello', 'world'], [1, 2, 3], ['2023-01-01', '2023-01-02']), + (2, ['doris', 'apache'], [4, 5, 6], ['2023-02-01', '2023-02-02']), + (3, NULL, NULL, NULL), + (4, [], [], []), + (5, ['test', 'array'], [7, 8, 9], ['2023-03-01', '2023-03-02']), + (6, ['index', 'support'], [10, 11, 12], ['2023-04-01', '2023-04-02']); + """ + + // Create indexes on supported array types - should succeed + sql """ ALTER TABLE ${tableName1} ADD INDEX idx_str_arr (str_arr) USING INVERTED; """ + wait_for_latest_op_on_table_finish(tableName1, timeout) + + sql """ ALTER TABLE ${tableName1} ADD INDEX idx_int_arr (int_arr) USING INVERTED; """ + wait_for_latest_op_on_table_finish(tableName1, timeout) + + sql """ ALTER TABLE ${tableName1} ADD INDEX idx_date_arr (date_arr) USING INVERTED; """ + wait_for_latest_op_on_table_finish(tableName1, timeout) + + // Create table with unsupported array types + sql """ + CREATE TABLE ${tableName2} ( + id int, + nested_arr ARRAY<ARRAY<STRING>>, + map_arr ARRAY<MAP<STRING,INT>>, + float_arr ARRAY<FLOAT>, + struct_arr ARRAY<STRUCT< + name:STRING, + age:INT, + score:FLOAT + >> + ) ENGINE=OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + // Insert some data into unsupported array type table + sql """ INSERT INTO ${tableName2} VALUES + (1, [['a', 'b'], ['c', 'd']], [{'key1': 1, 'key2': 2}], [1.1, 2.2], array(named_struct('name', 'Alice', 'age', 20, 'score', 85.5))), + (2, [['e', 'f']], [{'key3': 3}], [3.3], array(named_struct('name', 'Bob', 'age', 25, 'score', 90.0))); + """ + + test { + sql """ ALTER TABLE ${tableName2} ADD INDEX idx_nested_arr (nested_arr) USING INVERTED; """ + exception "is not supported in" + } + + // Test creating index on array of map - should fail + test { + sql """ ALTER TABLE ${tableName2} ADD INDEX idx_map_arr (map_arr) USING INVERTED; """ + exception "is not supported in" + } + + // Test creating index on array of float - should fail + test { + sql """ ALTER TABLE ${tableName2} ADD INDEX idx_float_arr (float_arr) USING INVERTED; """ + exception "is not supported in" + } + + // Test creating index on array of struct - should fail + test { + sql """ ALTER TABLE ${tableName2} ADD INDEX idx_struct_arr (struct_arr) USING INVERTED; """ + exception "is not supported in" + } + + // Test array_contains function + qt_sql """ + SELECT id, str_arr, int_arr, date_arr + FROM ${tableName1} + WHERE array_contains(str_arr, 'world') + OR array_contains(int_arr, 8) + OR array_contains(date_arr, '2023-03-01') + ORDER BY id; + """ + + // Test array_contains with multiple conditions + qt_sql """ + SELECT id + FROM ${tableName1} + WHERE array_contains(str_arr, 'apache') + AND array_contains(int_arr, 5) + AND array_contains(date_arr, '2023-02-02') + ORDER BY id; + """ + + // Test array_contains with NULL and empty arrays + qt_sql """ + SELECT id, str_arr + FROM ${tableName1} + WHERE array_contains(str_arr, 'test') + OR str_arr IS NULL + ORDER BY id; + """ + + sql "DROP TABLE IF EXISTS ${tableName1}" + sql "DROP TABLE IF EXISTS ${tableName2}" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org