This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new 1d4e8bafa91 [feature](predifine) modify syntax and add patternType 
field (#49895)
1d4e8bafa91 is described below

commit 1d4e8bafa914036d8132bd3d2365092bd2f406cf
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Wed Apr 9 14:28:28 2025 +0800

    [feature](predifine) modify syntax and add patternType field (#49895)
---
 .../org/apache/doris/catalog/VariantField.java     |  25 +++++++--
 .../antlr4/org/apache/doris/nereids/DorisLexer.g4  |   2 +
 .../antlr4/org/apache/doris/nereids/DorisParser.g4 |   6 ++-
 .../main/java/org/apache/doris/catalog/Column.java |  15 ++++--
 .../doris/nereids/parser/LogicalPlanBuilder.java   |   8 ++-
 .../apache/doris/nereids/types/VariantField.java   |  30 ++++++-----
 gensrc/thrift/Descriptors.thrift                   |   6 +++
 regression-test/data/variant_p0/predefine/load.out | Bin 7262 -> 7454 bytes
 .../data/variant_p0/predefine/sql/q01.out          | Bin 1740 -> 2108 bytes
 .../suites/variant_p0/predefine/load.groovy        |  60 ++++++++++-----------
 10 files changed, 97 insertions(+), 55 deletions(-)

diff --git 
a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantField.java 
b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantField.java
index 4ee931ef08f..556dabb21a2 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantField.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantField.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.catalog;
 
+import org.apache.doris.thrift.TPatternType;
 import org.apache.doris.thrift.TTypeDesc;
 import org.apache.doris.thrift.TTypeNode;
 
@@ -24,19 +25,29 @@ import com.google.common.base.Strings;
 import com.google.gson.annotations.SerializedName;
 
 public class VariantField {
+
     @SerializedName(value = "fp")
     protected final String pattern;
 
-    @SerializedName(value = "type")
+    @SerializedName(value = "ft")
     protected final Type type;
 
-    @SerializedName(value = "c")
+    @SerializedName(value = "fc")
     protected final String comment;
 
-    public VariantField(String pattern, Type type, String comment) {
+    @SerializedName(value = "fpt")
+    protected final TPatternType patternType;
+
+    public VariantField(String pattern, Type type, String comment, 
TPatternType patternType) {
         this.pattern = pattern;
         this.type = type;
         this.comment = comment;
+        this.patternType = patternType;
+    }
+
+    // default MATCH_GLOB
+    public VariantField(String pattern, Type type, String comment) {
+        this(pattern, type, comment, TPatternType.MATCH_NAME_GLOB);
     }
 
     public Type getType() {
@@ -51,6 +62,10 @@ public class VariantField {
         return comment;
     }
 
+    public TPatternType getPatternType() {
+        return patternType;
+    }
+
     public String toSql(int depth) {
         String typeSql;
         if (depth < Type.MAX_NESTING_DEPTH) {
@@ -58,9 +73,9 @@ public class VariantField {
         } else {
             typeSql = "...";
         }
-        StringBuilder sb = new StringBuilder(pattern);
+        StringBuilder sb = new StringBuilder(patternType.toString() + " ");
         if (type != null) {
-            sb.append(":").append(typeSql);
+            sb.append(pattern).append(":").append(typeSql);
         }
         return sb.toString();
     }
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index 68e85a39a82..31576ded912 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -343,6 +343,8 @@ MATCH_PHRASE: 'MATCH_PHRASE';
 MATCH_PHRASE_EDGE: 'MATCH_PHRASE_EDGE';
 MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX';
 MATCH_REGEXP: 'MATCH_REGEXP';
+MATCH_NAME: 'MATCH_NAME';
+MATCH_NAME_GLOB: 'MATCH_NAME_GLOB';
 MATERIALIZED: 'MATERIALIZED';
 MAX: 'MAX';
 MAXVALUE: 'MAXVALUE';
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index cd6da72dfc8..f6096008e0d 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -1731,9 +1731,11 @@ variantSubColTypeList
     : variantSubColType (COMMA variantSubColType)*
     ;
 variantSubColType
-    : qualifiedName COLON dataType commentSpec?
+    : variantSubColMatchType? STRING_LITERAL COLON dataType commentSpec?
+    ;
+variantSubColMatchType
+    : (MATCH_NAME | MATCH_NAME_GLOB)
     ;
-
 commentSpec
     : COMMENT STRING_LITERAL
     ;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index 16526b44620..5736afd0a90 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -36,6 +36,7 @@ import org.apache.doris.proto.OlapFile;
 import org.apache.doris.thrift.TAggregationType;
 import org.apache.doris.thrift.TColumn;
 import org.apache.doris.thrift.TColumnType;
+import org.apache.doris.thrift.TPatternType;
 import org.apache.doris.thrift.TPrimitiveType;
 
 import com.google.common.base.Strings;
@@ -157,9 +158,9 @@ public class Column implements GsonPostProcessable {
     @SerializedName(value = "gctt")
     private Set<String> generatedColumnsThatReferToThis = new HashSet<>();
 
-    @SerializedName(value = "idxid")
-    // variant subfield index id list(support multi index)
-    private List<Long> indexIdList = Lists.newArrayList();
+    // used for variant sub-field pattern type
+    @SerializedName(value = "fpt")
+    private TPatternType fieldPatternType;
 
     public Column() {
         this.name = "";
@@ -352,6 +353,7 @@ public class Column implements GsonPostProcessable {
                 // set column name as pattern
                 Column c = new Column(field.pattern, field.getType());
                 c.setIsAllowNull(true);
+                c.setFieldPatternType(field.getPatternType());
                 column.addChildrenColumn(c);
             }
         }
@@ -554,6 +556,10 @@ public class Column implements GsonPostProcessable {
         this.isAllowNull = isAllowNull;
     }
 
+    public void setFieldPatternType(TPatternType type) {
+        fieldPatternType = type;
+    }
+
     public String getDefaultValue() {
         return this.defaultValue;
     }
@@ -681,6 +687,9 @@ public class Column implements GsonPostProcessable {
 
         childrenTColumn.setColumnType(childrenTColumnType);
         childrenTColumn.setIsAllowNull(children.isAllowNull());
+        if (children.fieldPatternType != null) {
+            childrenTColumn.setPatternType(children.fieldPatternType);
+        }
         // TODO: If we don't set the aggregate type for children, the type 
will be
         //  considered as TAggregationType::SUM after deserializing in BE.
         //  For now, we make children inherit the aggregate type from their 
parent.
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 499cffe5df6..70c71dfde6d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -4096,7 +4096,13 @@ public class LogicalPlanBuilder extends 
DorisParserBaseVisitor<Object> {
         } else {
             comment = "";
         }
-        return new VariantField(ctx.qualifiedName().getText(), 
typedVisit(ctx.dataType()), comment);
+        String pattern = ctx.STRING_LITERAL().getText();
+        pattern = pattern.substring(1, pattern.length() - 1);
+        if (ctx.variantSubColMatchType() != null) {
+            return new VariantField(pattern, typedVisit(ctx.dataType()), 
comment,
+                    ctx.variantSubColMatchType().getText());
+        }
+        return new VariantField(pattern, typedVisit(ctx.dataType()), comment);
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantField.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantField.java
index 1a092c1160a..ca9ed52156d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantField.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantField.java
@@ -18,6 +18,7 @@
 package org.apache.doris.nereids.types;
 
 import org.apache.doris.nereids.util.Utils;
+import org.apache.doris.thrift.TPatternType;
 
 import java.util.Objects;
 
@@ -25,21 +26,33 @@ import java.util.Objects;
  * A field inside a StructType.
  */
 public class VariantField {
-
     private final String pattern;
     private final DataType dataType;
     private final String comment;
+    private final TPatternType patternType;
+
+    public VariantField(String pattern, DataType dataType, String comment) {
+        this(pattern, dataType, comment, TPatternType.MATCH_NAME_GLOB.name());
+    }
 
     /**
      * StructField Constructor
      *  @param pattern of this field
      *  @param dataType The data type of this field
      *  @param comment The comment of this field
+     *  @param patternType The patternType of this field
      */
-    public VariantField(String pattern, DataType dataType, String comment) {
+    public VariantField(String pattern, DataType dataType, String comment, 
String patternType) {
         this.pattern = Objects.requireNonNull(pattern, "pattern should not be 
null");
         this.dataType = Objects.requireNonNull(dataType, "dataType should not 
be null");
         this.comment = Objects.requireNonNull(comment, "comment should not be 
null");
+        TPatternType type;
+        if (patternType.equalsIgnoreCase("MATCH_NAME")) {
+            type = TPatternType.MATCH_NAME;
+        } else {
+            type = TPatternType.MATCH_NAME_GLOB;
+        }
+        this.patternType = Objects.requireNonNull(type, "patternType should 
not be null");
     }
 
     public String getPattern() {
@@ -54,20 +67,9 @@ public class VariantField {
         return comment;
     }
 
-    public VariantField conversion() {
-        if (this.dataType.equals(dataType.conversion())) {
-            return this;
-        }
-        return withDataType(dataType.conversion());
-    }
-
-    public VariantField withDataType(DataType dataType) {
-        return new VariantField(pattern, dataType, comment);
-    }
-
     public org.apache.doris.catalog.VariantField toCatalogDataType() {
         return new org.apache.doris.catalog.VariantField(
-                pattern, dataType.toCatalogDataType(), comment);
+                pattern, dataType.toCatalogDataType(), comment, patternType);
     }
 
     public String toSql() {
diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift
index f4070114617..550b20308bd 100644
--- a/gensrc/thrift/Descriptors.thrift
+++ b/gensrc/thrift/Descriptors.thrift
@@ -22,6 +22,11 @@ include "Types.thrift"
 include "Exprs.thrift"
 include "Partitions.thrift"
 
+enum TPatternType {
+  MATCH_NAME = 1,
+  MATCH_NAME_GLOB = 2
+}
+
 struct TColumn {
     1: required string column_name
     2: required Types.TColumnType column_type
@@ -43,6 +48,7 @@ struct TColumn {
     18: optional bool is_auto_increment = false;
     19: optional i32 cluster_key_id = -1
     20: optional i32 be_exec_version = -1
+    21: optional TPatternType pattern_type
 }
 
 struct TSlotDescriptor {
diff --git a/regression-test/data/variant_p0/predefine/load.out 
b/regression-test/data/variant_p0/predefine/load.out
index ca4161c3a01..e2347eb8508 100644
Binary files a/regression-test/data/variant_p0/predefine/load.out and 
b/regression-test/data/variant_p0/predefine/load.out differ
diff --git a/regression-test/data/variant_p0/predefine/sql/q01.out 
b/regression-test/data/variant_p0/predefine/sql/q01.out
index 54f9bace38b..aa40b7653e9 100644
Binary files a/regression-test/data/variant_p0/predefine/sql/q01.out and 
b/regression-test/data/variant_p0/predefine/sql/q01.out differ
diff --git a/regression-test/suites/variant_p0/predefine/load.groovy 
b/regression-test/suites/variant_p0/predefine/load.groovy
index 7ccb8068024..cf0ad73491f 100644
--- a/regression-test/suites/variant_p0/predefine/load.groovy
+++ b/regression-test/suites/variant_p0/predefine/load.groovy
@@ -21,7 +21,7 @@ suite("regression_test_variant_predefine_schema", "p0"){
         CREATE TABLE `test_predefine` (
             `id` bigint NOT NULL,
             `type` varchar(30) NULL,
-            `v1` 
variant<a.b.c:int,ss:string,dcm:decimal,dt:datetime,ip:ipv4,a.b.d:double> NULL,
+            `v1` 
variant<'a.b.c':int,'ss':string,'dcm':decimal,'dt':datetime,'ip':ipv4,'a.b.d':double>
 NULL,
             INDEX idx_var_sub(`v1`) USING INVERTED PROPERTIES("parser" = 
"english") )
         ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 3
         PROPERTIES ( "replication_allocation" = "tag.location.default: 1", 
"variant_max_subcolumns_count" = "0");
@@ -80,29 +80,29 @@ suite("regression_test_variant_predefine_schema", "p0"){
         CREATE TABLE `test_predefine2` (
             `id` bigint NOT NULL,
             `v1` variant<
-                array_int:array<int>,
-                array_string:array<string>,
-                array_decimal:array<decimalv3(27,9)>,
-                array_datetime:array<datetime>,
-                array_datetimev2:array<datetimev2>,
-                array_date:array<date>,
-                array_datev2:array<datev2>,
-                array_ipv4:array<ipv4>,
-                array_ipv6:array<ipv6>,
-                array_float:array<float>,
-                array_boolean:array<boolean>,
-                int_:int, 
-                string_:string, 
-                decimal_:decimalv3(27,9), 
-                datetime_:datetime,
-                datetimev2_:datetimev2(6),
-                date_:date,
-                datev2_:datev2,
-                ipv4_:ipv4,
-                ipv6_:ipv6,
-                float_:float,
-                boolean_:boolean,
-                varchar_:varchar
+                'array_int':array<int>,
+                'array_string':array<string>,
+                'array_decimal':array<decimalv3(27,9)>,
+                'array_datetime':array<datetime>,
+                'array_datetimev2':array<datetimev2>,
+                'array_date':array<date>,
+                'array_datev2':array<datev2>,
+                'array_ipv4':array<ipv4>,
+                'array_ipv6':array<ipv6>,
+                'array_float':array<float>,
+                'array_boolean':array<boolean>,
+                'int_':int, 
+                'string_':string, 
+                'decimal_':decimalv3(27,9), 
+                'datetime_':datetime,
+                'datetimev2_':datetimev2(6),
+                'date_':date,
+                'datev2_':datev2,
+                'ipv4_':ipv4,
+                'ipv6_':ipv6,
+                'float_':float,
+                'boolean_':boolean,
+                'varchar_':varchar
             > NULL
         ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 2
         PROPERTIES ( "replication_allocation" = "tag.location.default: 1", 
"variant_max_subcolumns_count" = "0");
@@ -201,9 +201,9 @@ suite("regression_test_variant_predefine_schema", "p0"){
 
     // // schema change
     // // 1. add column
-    sql "alter table test_predefine1 add column v2 
variant<dcm:decimal,dt:datetime> default null"
+    sql "alter table test_predefine1 add column v2 
variant<'dcm':decimal,'dt':datetime> default null"
     sql """insert into test_predefine1 values(101, '{"a" :1}', '{"dcm": 
1111111}')""" 
-    sql "alter table test_predefine1 add column v3 
variant<dcm:decimal,dt:datetime,ip:ipv6> default null"
+    sql "alter table test_predefine1 add column v3 
variant<'dcm':decimal,'dt':datetime,'ip':ipv6> default null"
     sql """insert into test_predefine1 values(102, '{"a" :1}', '{"dcm": 
1111111}', '{"dcm": 1111111}');"""
     // 2. todo support alter column type
     // sql "alter table test_predefine1 modify column v3 
variant<dcm:decimal,dt:datetime,ip:ipv6>"
@@ -216,7 +216,7 @@ suite("regression_test_variant_predefine_schema", "p0"){
     sql "DROP TABLE IF EXISTS test_predefine3"
     sql """CREATE TABLE `test_predefine3` (
             `id` bigint NOT NULL,
-            `v` variant<`nested.a`: string> NULL)
+            `v` variant<'nested.a':string> NULL)
         ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1
         PROPERTIES ( "replication_allocation" = "tag.location.default: 1", 
"variant_enable_flatten_nested" = "true", "variant_max_subcolumns_count" = 
"0");"""
 
@@ -251,7 +251,7 @@ suite("regression_test_variant_predefine_schema", "p0"){
     sql """
     CREATE TABLE `region_insert` (
       `k` bigint NULL,
-      `var` 
variant<c_acctbal:text,c_address:text,c_comment:text,c_custkey:text,c_mktsegment:text,c_name:text,c_nationkey:text,c_phone:text,p_brand:float,p_comment:text,p_container:text,p_mfgr:text,p_name:text,p_partkey:text,p_retailprice:text,p_size:text,p_type:text,r_comment:text,r_name:text,r_regionkey:text,ps_availqty:text,ps_comment:text,ps_none:text,ps_partkey:text,ps_suppkey:text,ps_supplycost:text,s_acctbal:text,s_address:text,s_comment:text,s_name:text,s_nationkey:text,s_phone:t
 [...]
+      `var` 
variant<'c_acctbal':text,'c_address':text,'c_comment':text,'c_custkey':text,'c_mktsegment':text,'c_name':text,'c_nationkey':text,'c_phone':text,'p_brand':float,'p_comment':text,'p_container':text,'p_mfgr':text,'p_name':text,'p_partkey':text,'p_retailprice':text,'p_size':text,'p_type':text,'r_comment':text,'r_name':text,'r_regionkey':text,'ps_availqty':text,'ps_comment':text,'ps_none':text,'ps_partkey':text,'ps_suppkey':text,'ps_supplycost':text,'key_46':text,'key_47':text,'ke
 [...]
       `OfvZr` variant NULL
     ) ENGINE=OLAP
     DUPLICATE KEY(`k`)
@@ -281,7 +281,7 @@ suite("regression_test_variant_predefine_schema", "p0"){
     sql """
         CREATE TABLE `test_bf_with_bool` (
       `k` bigint NULL,
-      `var` variant<c_bool:boolean>
+      `var` variant<'c_bool':boolean>
     ) ENGINE=OLAP
     DUPLICATE KEY(`k`)
     DISTRIBUTED BY HASH(`k`) BUCKETS 5
@@ -300,7 +300,7 @@ suite("regression_test_variant_predefine_schema", "p0"){
     sql """
         CREATE TABLE `test_array_with_nulls` (
       `k` bigint NULL,
-      `var` variant<array_decimal:array<decimalv3(27,9)>>
+      `var` variant<match_name 'array_decimal':array<decimalv3(27,9)>>
     ) ENGINE=OLAP
     DUPLICATE KEY(`k`)
     DISTRIBUTED BY HASH(`k`) BUCKETS 1


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to