This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new cf243787a16 Branch-3.0 [fix](array-index) Fix types that do not 
support indexing also in array nesting should also not be supported. #50162 
(#50315)
cf243787a16 is described below

commit cf243787a16579b999ebc7f9026002ab3b655b59
Author: amory <wangqian...@selectdb.com>
AuthorDate: Tue Apr 29 14:00:02 2025 +0800

    Branch-3.0 [fix](array-index) Fix types that do not support indexing also 
in array nesting should also not be supported. #50162 (#50315)
    
    ### What problem does this PR solve?
    backpork: https://github.com/apache/doris/pull/50162
    and patch https://github.com/apache/doris/pull/50382
    Issue Number: close #xxx
---
 .../java/org/apache/doris/analysis/IndexDef.java   |  23 ++-
 .../trees/plans/commands/info/IndexDefinition.java |  22 ++-
 .../org/apache/doris/analysis/IndexDefTest.java    |  71 +++++++++
 .../trees/plans/commands/IndexDefinitionTest.java  |  73 ++++++++++
 .../data/inverted_index_p0/test_array_index2.out   | Bin 0 -> 277 bytes
 .../inverted_index_p0/test_array_index2.groovy     | 160 +++++++++++++++++++++
 6 files changed, 343 insertions(+), 6 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
index 530c0c3f9ff..edf62f44d52 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@@ -17,9 +17,11 @@
 
 package org.apache.doris.analysis;
 
+import org.apache.doris.catalog.ArrayType;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.Type;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
 import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
@@ -214,6 +216,22 @@ public class IndexDef {
         return (this.indexType == IndexType.INVERTED);
     }
 
+    // Check if the column type is supported for inverted index
+    public boolean isSupportIdxType(Type colType) {
+        if (colType.isArrayType()) {
+            Type itemType = ((ArrayType) colType).getItemType();
+            if (itemType.isArrayType()) {
+                return false;
+            }
+            return isSupportIdxType(itemType);
+        }
+        PrimitiveType primitiveType = colType.getPrimitiveType();
+        return primitiveType.isDateType() || primitiveType.isDecimalV2Type() 
|| primitiveType.isDecimalV3Type()
+                || primitiveType.isFixedPointType() || 
primitiveType.isStringType()
+                || primitiveType == PrimitiveType.BOOLEAN
+                || primitiveType.isVariantType() || primitiveType.isIPType();
+    }
+
     public void checkColumn(Column column, KeysType keysType, boolean 
enableUniqueKeyMergeOnWrite,
             TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat) 
throws AnalysisException {
         if (indexType == IndexType.BITMAP || indexType == IndexType.INVERTED 
|| indexType == IndexType.BLOOMFILTER
@@ -221,9 +239,8 @@ public class IndexDef {
             String indexColName = column.getName();
             caseSensitivityColumns.add(indexColName);
             PrimitiveType colType = column.getDataType();
-            if (!(colType.isDateType() || colType.isDecimalV2Type() || 
colType.isDecimalV3Type()
-                    || colType.isFixedPointType() || colType.isStringType() || 
colType == PrimitiveType.BOOLEAN
-                    || colType.isVariantType() || colType.isIPType() || 
colType.isArrayType())) {
+            Type columnType = column.getType();
+            if (!isSupportIdxType(columnType)) {
                 throw new AnalysisException(colType + " is not supported in " 
+ indexType.toString() + " index. "
                         + "invalid index: " + indexName);
             }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
index 77a90755250..b0f97e2fc27 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
@@ -25,6 +25,7 @@ import org.apache.doris.catalog.Index;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.common.Config;
 import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.types.ArrayType;
 import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.nereids.util.Utils;
 import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
@@ -91,6 +92,23 @@ public class IndexDefinition {
         this.comment = comment;
     }
 
+    /**
+     * Check if the column type is supported for inverted index
+     */
+    public boolean isSupportIdxType(DataType columnType) {
+        if (columnType.isArrayType()) {
+            DataType itemType = ((ArrayType) columnType).getItemType();
+            if (itemType.isArrayType()) {
+                return false;
+            }
+            return isSupportIdxType(itemType);
+        }
+        return columnType.isDateLikeType() || columnType.isDecimalLikeType()
+                || columnType.isIntegralType() || columnType.isStringLikeType()
+                || columnType.isBooleanType() || columnType.isVariantType()
+                || columnType.isIPType();
+    }
+
     /**
      * checkColumn
      */
@@ -102,9 +120,7 @@ public class IndexDefinition {
             String indexColName = column.getName();
             caseSensitivityCols.add(indexColName);
             DataType colType = column.getType();
-            if (!(colType.isDateLikeType() || colType.isDecimalLikeType() || 
colType.isArrayType()
-                    || colType.isIntegralType() || colType.isStringLikeType()
-                    || colType.isBooleanType() || colType.isVariantType() || 
colType.isIPType())) {
+            if (!isSupportIdxType(colType)) {
                 // TODO add colType.isAggState()
                 throw new AnalysisException(colType + " is not supported in " 
+ indexType.toString()
                         + " index. " + "invalid index: " + name);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java
index 3d528ce1b68..96bcd033e61 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/IndexDefTest.java
@@ -17,9 +17,14 @@
 
 package org.apache.doris.analysis;
 
+import org.apache.doris.catalog.ArrayType;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.KeysType;
+import org.apache.doris.catalog.MapType;
 import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.ScalarType;
+import org.apache.doris.catalog.StructField;
+import org.apache.doris.catalog.StructType;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
 
@@ -28,6 +33,8 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.util.ArrayList;
+
 public class IndexDefTest {
     private IndexDef def;
 
@@ -80,4 +87,68 @@ public class IndexDefTest {
         Assert.assertEquals("INDEX `index1` ON table1 (`col1`) USING INVERTED 
COMMENT 'balabala'",
                 def.toSql("table1"));
     }
+
+    @Test
+    public void testArrayTypeSupport() throws AnalysisException {
+        def = new IndexDef("array_index", false, Lists.newArrayList("col1"),
+                IndexDef.IndexType.INVERTED, null, "array test");
+
+        // Test array of supported types
+        Column arrayOfString = new Column("col1",
+                ArrayType.create(ScalarType.createVarchar(10), false));
+        def.checkColumn(arrayOfString, KeysType.DUP_KEYS, true, 
TInvertedIndexFileStorageFormat.V1);
+
+        Column arrayOfInt = new Column("col1",
+                ArrayType.create(ScalarType.createType(PrimitiveType.INT), 
false));
+        def.checkColumn(arrayOfInt, KeysType.DUP_KEYS, true, 
TInvertedIndexFileStorageFormat.V1);
+
+        Column arrayOfDate = new Column("col1",
+                ArrayType.create(ScalarType.createType(PrimitiveType.DATE), 
false));
+        def.checkColumn(arrayOfDate, KeysType.DUP_KEYS, true, 
TInvertedIndexFileStorageFormat.V1);
+
+        // Array<Array<String>>
+        try {
+            Column nestedArray = new Column("col1",
+                    
ArrayType.create(ArrayType.create(ScalarType.createVarchar(10), false), false));
+            def.checkColumn(nestedArray, KeysType.DUP_KEYS, true, 
TInvertedIndexFileStorageFormat.V1);
+            Assert.fail("No exception throws for unsupported array element 
type.");
+        } catch (AnalysisException e) {
+            Assert.assertTrue(e.getMessage().contains("is not supported in"));
+        }
+
+        // Test array of unsupported types
+        try {
+            Column arrayOfFloat = new Column("col1",
+                    
ArrayType.create(ScalarType.createType(PrimitiveType.FLOAT), false));
+            def.checkColumn(arrayOfFloat, KeysType.DUP_KEYS, true, 
TInvertedIndexFileStorageFormat.V1);
+            Assert.fail("No exception throws for unsupported array element 
type.");
+        } catch (AnalysisException e) {
+            Assert.assertTrue(e.getMessage().contains("is not supported in"));
+        }
+
+        try {
+            // Array<Map<String, Int>>
+            Column arrayOfMap = new Column("col1",
+                    ArrayType.create(new MapType(
+                            ScalarType.createVarchar(10),
+                            ScalarType.createType(PrimitiveType.INT)), false));
+            def.checkColumn(arrayOfMap, KeysType.DUP_KEYS, true, 
TInvertedIndexFileStorageFormat.V1);
+            Assert.fail("No exception throws for array of map type.");
+        } catch (AnalysisException e) {
+            Assert.assertTrue(e.getMessage().contains("is not supported in"));
+        }
+
+        try {
+            // Array<Struct<name:String, age:Int>>
+            ArrayList<StructField> fields = new ArrayList<>();
+            fields.add(new StructField("name", ScalarType.createVarchar(10), 
null));
+            fields.add(new StructField("age", 
ScalarType.createType(PrimitiveType.INT), null));
+            Column arrayOfStruct = new Column("col1",
+                    ArrayType.create(new StructType(fields), false));
+            def.checkColumn(arrayOfStruct, KeysType.DUP_KEYS, true, 
TInvertedIndexFileStorageFormat.V1);
+            Assert.fail("No exception throws for array of struct type.");
+        } catch (AnalysisException e) {
+            Assert.assertTrue(e.getMessage().contains("is not supported in"));
+        }
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
index 803ae6d508b..93caa559cc2 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/IndexDefinitionTest.java
@@ -22,8 +22,13 @@ import org.apache.doris.catalog.KeysType;
 import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.trees.plans.commands.info.ColumnDefinition;
 import org.apache.doris.nereids.trees.plans.commands.info.IndexDefinition;
+import org.apache.doris.nereids.types.ArrayType;
+import org.apache.doris.nereids.types.FloatType;
 import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.MapType;
 import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.StructField;
+import org.apache.doris.nereids.types.StructType;
 import org.apache.doris.nereids.types.VariantType;
 import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;
 
@@ -31,6 +36,7 @@ import com.google.common.collect.Lists;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -50,6 +56,73 @@ public class IndexDefinitionTest {
         }
     }
 
+    void testArrayTypeSupport() throws AnalysisException {
+        IndexDefinition def = new IndexDefinition("array_index", 
Lists.newArrayList("col1"),
+                "INVERTED", null, "array test");
+
+        // Test array of supported types
+        def.checkColumn(new ColumnDefinition("col1",
+                ArrayType.of(StringType.INSTANCE), false, AggregateType.NONE, 
true, null, "comment"),
+                KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1);
+
+        def.checkColumn(new ColumnDefinition("col1",
+                ArrayType.of(IntegerType.INSTANCE), false, AggregateType.NONE, 
true, null, "comment"),
+                KeysType.DUP_KEYS, false, TInvertedIndexFileStorageFormat.V1);
+
+        def.checkColumn(new ColumnDefinition("col1",
+                    ArrayType.of(ArrayType.of(StringType.INSTANCE)), false,
+                    AggregateType.NONE, true, null, "comment"),
+                    KeysType.DUP_KEYS, false, 
TInvertedIndexFileStorageFormat.V1);
+
+        // Test array of unsupported types
+        try {
+            // Array<Float>
+            def.checkColumn(new ColumnDefinition("col1",
+                    ArrayType.of(FloatType.INSTANCE), false,
+                    AggregateType.NONE, true, null, "comment"),
+                    KeysType.DUP_KEYS, false, 
TInvertedIndexFileStorageFormat.V1);
+            Assertions.fail("No exception throws for unsupported array element 
type (Float).");
+        } catch (AnalysisException e) {
+            Assertions.assertTrue(e.getMessage().contains("is not supported 
in"));
+        }
+
+        try {
+            // Array<Array<String>>
+            def.checkColumn(new ColumnDefinition("col1",
+                    ArrayType.of(ArrayType.of(StringType.INSTANCE)), false,
+                    AggregateType.NONE, true, null, "comment"),
+                    KeysType.DUP_KEYS, false, 
TInvertedIndexFileStorageFormat.V1);
+            Assertions.fail("No exception throws for array of array type.");
+        } catch (AnalysisException e) {
+            Assertions.assertTrue(e.getMessage().contains("is not supported 
in"));
+        }
+
+        try {
+            // Array<Map<String, Int>>
+            def.checkColumn(new ColumnDefinition("col1",
+                    ArrayType.of(MapType.of(StringType.INSTANCE, 
IntegerType.INSTANCE)), false,
+                    AggregateType.NONE, true, null, "comment"),
+                    KeysType.DUP_KEYS, false, 
TInvertedIndexFileStorageFormat.V1);
+            Assertions.fail("No exception throws for array of map type.");
+        } catch (AnalysisException e) {
+            Assertions.assertTrue(e.getMessage().contains("is not supported 
in"));
+        }
+
+        try {
+            // Array<Struct<name:String, age:Int>>
+            ArrayList<StructField> fields = new ArrayList<>();
+            fields.add(new StructField("name", StringType.INSTANCE, true, 
null));
+            fields.add(new StructField("age", IntegerType.INSTANCE, true, 
null));
+            def.checkColumn(new ColumnDefinition("col1",
+                    ArrayType.of(new StructType(fields)), false,
+                    AggregateType.NONE, true, null, "comment"),
+                    KeysType.DUP_KEYS, false, 
TInvertedIndexFileStorageFormat.V1);
+            Assertions.fail("No exception throws for array of struct type.");
+        } catch (AnalysisException e) {
+            Assertions.assertTrue(e.getMessage().contains("is not supported 
in"));
+        }
+    }
+
     @Test
     void testNgramBFIndex() throws AnalysisException {
         Map<String, String> properties = new HashMap<>();
diff --git a/regression-test/data/inverted_index_p0/test_array_index2.out 
b/regression-test/data/inverted_index_p0/test_array_index2.out
new file mode 100644
index 00000000000..03ffac07a76
Binary files /dev/null and 
b/regression-test/data/inverted_index_p0/test_array_index2.out differ
diff --git a/regression-test/suites/inverted_index_p0/test_array_index2.groovy 
b/regression-test/suites/inverted_index_p0/test_array_index2.groovy
new file mode 100644
index 00000000000..9fb6747ca83
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_array_index2.groovy
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_index2") {
+    def tableName1 = "array_test_supported"
+    def tableName2 = "array_test_unsupported"
+
+    def timeout = 60000
+    def delta_time = 1000
+    def alter_res = "null"
+    def useTime = 0
+
+    def wait_for_latest_op_on_table_finish = { table_name, OpTimeout ->
+        for(int t = delta_time; t <= OpTimeout; t += delta_time) {
+            alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = 
"${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
+            alter_res = alter_res.toString()
+            if(alter_res.contains("FINISHED")) {
+                sleep(10000) // wait change table state to normal
+                logger.info(table_name + " latest alter job finished, detail: 
" + alter_res)
+                break
+            }
+            useTime = t
+            sleep(delta_time)
+        }
+        assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish 
timeout")
+    }
+
+    sql "DROP TABLE IF EXISTS ${tableName1}"
+    sql "DROP TABLE IF EXISTS ${tableName2}"
+
+    // Create table with supported array types
+    sql """
+        CREATE TABLE ${tableName1} (
+            id int,
+            str_arr ARRAY<STRING>,
+            int_arr ARRAY<INT>,
+            date_arr ARRAY<DATE>
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    // Insert test data before creating indexes
+    sql """ INSERT INTO ${tableName1} VALUES
+        (1, ['hello', 'world'], [1, 2, 3], ['2023-01-01', '2023-01-02']),
+        (2, ['doris', 'apache'], [4, 5, 6], ['2023-02-01', '2023-02-02']),
+        (3, NULL, NULL, NULL),
+        (4, [], [], []),
+        (5, ['test', 'array'], [7, 8, 9], ['2023-03-01', '2023-03-02']),
+        (6, ['index', 'support'], [10, 11, 12], ['2023-04-01', '2023-04-02']);
+    """
+
+    // Create indexes on supported array types - should succeed
+    sql """ ALTER TABLE ${tableName1} ADD INDEX idx_str_arr (str_arr) USING 
INVERTED; """
+    wait_for_latest_op_on_table_finish(tableName1, timeout)
+
+    sql """ ALTER TABLE ${tableName1} ADD INDEX idx_int_arr (int_arr) USING 
INVERTED; """
+    wait_for_latest_op_on_table_finish(tableName1, timeout)
+
+    sql """ ALTER TABLE ${tableName1} ADD INDEX idx_date_arr (date_arr) USING 
INVERTED; """
+    wait_for_latest_op_on_table_finish(tableName1, timeout)
+
+    // Create table with unsupported array types
+    sql """
+        CREATE TABLE ${tableName2} (
+            id int,
+            nested_arr ARRAY<ARRAY<STRING>>,
+            map_arr ARRAY<MAP<STRING,INT>>,
+            float_arr ARRAY<FLOAT>,
+            struct_arr ARRAY<STRUCT<
+                name:STRING,
+                age:INT,
+                score:FLOAT
+            >>
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    // Insert some data into unsupported array type table
+    sql """ INSERT INTO ${tableName2} VALUES
+        (1, [['a', 'b'], ['c', 'd']], [{'key1': 1, 'key2': 2}], [1.1, 2.2], 
array(named_struct('name', 'Alice', 'age', 20, 'score', 85.5))),
+        (2, [['e', 'f']], [{'key3': 3}], [3.3], array(named_struct('name', 
'Bob', 'age', 25, 'score', 90.0)));
+    """
+
+    test {
+         sql """ ALTER TABLE ${tableName2} ADD INDEX idx_nested_arr 
(nested_arr) USING INVERTED; """
+         exception "is not supported in"
+    } 
+
+    // Test creating index on array of map - should fail
+    test {
+        sql """ ALTER TABLE ${tableName2} ADD INDEX idx_map_arr (map_arr) 
USING INVERTED; """
+        exception "is not supported in"
+    }
+
+    // Test creating index on array of float - should fail
+    test {
+        sql """ ALTER TABLE ${tableName2} ADD INDEX idx_float_arr (float_arr) 
USING INVERTED; """
+        exception "is not supported in"
+    }
+
+    // Test creating index on array of struct - should fail
+    test {
+        sql """ ALTER TABLE ${tableName2} ADD INDEX idx_struct_arr 
(struct_arr) USING INVERTED; """
+        exception "is not supported in"
+    }
+
+    // Test array_contains function
+    qt_sql """ 
+        SELECT id, str_arr, int_arr, date_arr 
+        FROM ${tableName1} 
+        WHERE array_contains(str_arr, 'world') 
+            OR array_contains(int_arr, 8) 
+            OR array_contains(date_arr, '2023-03-01')
+        ORDER BY id;
+    """
+
+    // Test array_contains with multiple conditions
+    qt_sql """
+        SELECT id 
+        FROM ${tableName1}
+        WHERE array_contains(str_arr, 'apache')
+            AND array_contains(int_arr, 5)
+            AND array_contains(date_arr, '2023-02-02')
+        ORDER BY id;
+    """
+
+    // Test array_contains with NULL and empty arrays
+    qt_sql """
+        SELECT id, str_arr
+        FROM ${tableName1}
+        WHERE array_contains(str_arr, 'test')
+            OR str_arr IS NULL
+        ORDER BY id;
+    """
+
+    sql "DROP TABLE IF EXISTS ${tableName1}"
+    sql "DROP TABLE IF EXISTS ${tableName2}"
+} 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to