This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant-sparse in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push: new 1d4e8bafa91 [feature](predifine) modify syntax and add patternType field (#49895) 1d4e8bafa91 is described below commit 1d4e8bafa914036d8132bd3d2365092bd2f406cf Author: lihangyu <lihan...@selectdb.com> AuthorDate: Wed Apr 9 14:28:28 2025 +0800 [feature](predifine) modify syntax and add patternType field (#49895) --- .../org/apache/doris/catalog/VariantField.java | 25 +++++++-- .../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 2 + .../antlr4/org/apache/doris/nereids/DorisParser.g4 | 6 ++- .../main/java/org/apache/doris/catalog/Column.java | 15 ++++-- .../doris/nereids/parser/LogicalPlanBuilder.java | 8 ++- .../apache/doris/nereids/types/VariantField.java | 30 ++++++----- gensrc/thrift/Descriptors.thrift | 6 +++ regression-test/data/variant_p0/predefine/load.out | Bin 7262 -> 7454 bytes .../data/variant_p0/predefine/sql/q01.out | Bin 1740 -> 2108 bytes .../suites/variant_p0/predefine/load.groovy | 60 ++++++++++----------- 10 files changed, 97 insertions(+), 55 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantField.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantField.java index 4ee931ef08f..556dabb21a2 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantField.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantField.java @@ -17,6 +17,7 @@ package org.apache.doris.catalog; +import org.apache.doris.thrift.TPatternType; import org.apache.doris.thrift.TTypeDesc; import org.apache.doris.thrift.TTypeNode; @@ -24,19 +25,29 @@ import com.google.common.base.Strings; import com.google.gson.annotations.SerializedName; public class VariantField { + @SerializedName(value = "fp") protected final String pattern; - @SerializedName(value = "type") + @SerializedName(value = "ft") protected final Type type; - @SerializedName(value = "c") + @SerializedName(value = "fc") protected final String comment; - public VariantField(String pattern, Type type, String comment) { + @SerializedName(value = "fpt") + protected final TPatternType patternType; + + public VariantField(String pattern, Type type, String comment, TPatternType patternType) { this.pattern = pattern; this.type = type; this.comment = comment; + this.patternType = patternType; + } + + // default MATCH_GLOB + public VariantField(String pattern, Type type, String comment) { + this(pattern, type, comment, TPatternType.MATCH_NAME_GLOB); } public Type getType() { @@ -51,6 +62,10 @@ public class VariantField { return comment; } + public TPatternType getPatternType() { + return patternType; + } + public String toSql(int depth) { String typeSql; if (depth < Type.MAX_NESTING_DEPTH) { @@ -58,9 +73,9 @@ public class VariantField { } else { typeSql = "..."; } - StringBuilder sb = new StringBuilder(pattern); + StringBuilder sb = new StringBuilder(patternType.toString() + " "); if (type != null) { - sb.append(":").append(typeSql); + sb.append(pattern).append(":").append(typeSql); } return sb.toString(); } diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 index 68e85a39a82..31576ded912 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 @@ -343,6 +343,8 @@ MATCH_PHRASE: 'MATCH_PHRASE'; MATCH_PHRASE_EDGE: 'MATCH_PHRASE_EDGE'; MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX'; MATCH_REGEXP: 'MATCH_REGEXP'; +MATCH_NAME: 'MATCH_NAME'; +MATCH_NAME_GLOB: 'MATCH_NAME_GLOB'; MATERIALIZED: 'MATERIALIZED'; MAX: 'MAX'; MAXVALUE: 'MAXVALUE'; diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index cd6da72dfc8..f6096008e0d 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -1731,9 +1731,11 @@ variantSubColTypeList : variantSubColType (COMMA variantSubColType)* ; variantSubColType - : qualifiedName COLON dataType commentSpec? + : variantSubColMatchType? STRING_LITERAL COLON dataType commentSpec? + ; +variantSubColMatchType + : (MATCH_NAME | MATCH_NAME_GLOB) ; - commentSpec : COMMENT STRING_LITERAL ; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index 16526b44620..5736afd0a90 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -36,6 +36,7 @@ import org.apache.doris.proto.OlapFile; import org.apache.doris.thrift.TAggregationType; import org.apache.doris.thrift.TColumn; import org.apache.doris.thrift.TColumnType; +import org.apache.doris.thrift.TPatternType; import org.apache.doris.thrift.TPrimitiveType; import com.google.common.base.Strings; @@ -157,9 +158,9 @@ public class Column implements GsonPostProcessable { @SerializedName(value = "gctt") private Set<String> generatedColumnsThatReferToThis = new HashSet<>(); - @SerializedName(value = "idxid") - // variant subfield index id list(support multi index) - private List<Long> indexIdList = Lists.newArrayList(); + // used for variant sub-field pattern type + @SerializedName(value = "fpt") + private TPatternType fieldPatternType; public Column() { this.name = ""; @@ -352,6 +353,7 @@ public class Column implements GsonPostProcessable { // set column name as pattern Column c = new Column(field.pattern, field.getType()); c.setIsAllowNull(true); + c.setFieldPatternType(field.getPatternType()); column.addChildrenColumn(c); } } @@ -554,6 +556,10 @@ public class Column implements GsonPostProcessable { this.isAllowNull = isAllowNull; } + public void setFieldPatternType(TPatternType type) { + fieldPatternType = type; + } + public String getDefaultValue() { return this.defaultValue; } @@ -681,6 +687,9 @@ public class Column implements GsonPostProcessable { childrenTColumn.setColumnType(childrenTColumnType); childrenTColumn.setIsAllowNull(children.isAllowNull()); + if (children.fieldPatternType != null) { + childrenTColumn.setPatternType(children.fieldPatternType); + } // TODO: If we don't set the aggregate type for children, the type will be // considered as TAggregationType::SUM after deserializing in BE. // For now, we make children inherit the aggregate type from their parent. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 499cffe5df6..70c71dfde6d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -4096,7 +4096,13 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> { } else { comment = ""; } - return new VariantField(ctx.qualifiedName().getText(), typedVisit(ctx.dataType()), comment); + String pattern = ctx.STRING_LITERAL().getText(); + pattern = pattern.substring(1, pattern.length() - 1); + if (ctx.variantSubColMatchType() != null) { + return new VariantField(pattern, typedVisit(ctx.dataType()), comment, + ctx.variantSubColMatchType().getText()); + } + return new VariantField(pattern, typedVisit(ctx.dataType()), comment); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantField.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantField.java index 1a092c1160a..ca9ed52156d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantField.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantField.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.types; import org.apache.doris.nereids.util.Utils; +import org.apache.doris.thrift.TPatternType; import java.util.Objects; @@ -25,21 +26,33 @@ import java.util.Objects; * A field inside a StructType. */ public class VariantField { - private final String pattern; private final DataType dataType; private final String comment; + private final TPatternType patternType; + + public VariantField(String pattern, DataType dataType, String comment) { + this(pattern, dataType, comment, TPatternType.MATCH_NAME_GLOB.name()); + } /** * StructField Constructor * @param pattern of this field * @param dataType The data type of this field * @param comment The comment of this field + * @param patternType The patternType of this field */ - public VariantField(String pattern, DataType dataType, String comment) { + public VariantField(String pattern, DataType dataType, String comment, String patternType) { this.pattern = Objects.requireNonNull(pattern, "pattern should not be null"); this.dataType = Objects.requireNonNull(dataType, "dataType should not be null"); this.comment = Objects.requireNonNull(comment, "comment should not be null"); + TPatternType type; + if (patternType.equalsIgnoreCase("MATCH_NAME")) { + type = TPatternType.MATCH_NAME; + } else { + type = TPatternType.MATCH_NAME_GLOB; + } + this.patternType = Objects.requireNonNull(type, "patternType should not be null"); } public String getPattern() { @@ -54,20 +67,9 @@ public class VariantField { return comment; } - public VariantField conversion() { - if (this.dataType.equals(dataType.conversion())) { - return this; - } - return withDataType(dataType.conversion()); - } - - public VariantField withDataType(DataType dataType) { - return new VariantField(pattern, dataType, comment); - } - public org.apache.doris.catalog.VariantField toCatalogDataType() { return new org.apache.doris.catalog.VariantField( - pattern, dataType.toCatalogDataType(), comment); + pattern, dataType.toCatalogDataType(), comment, patternType); } public String toSql() { diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift index f4070114617..550b20308bd 100644 --- a/gensrc/thrift/Descriptors.thrift +++ b/gensrc/thrift/Descriptors.thrift @@ -22,6 +22,11 @@ include "Types.thrift" include "Exprs.thrift" include "Partitions.thrift" +enum TPatternType { + MATCH_NAME = 1, + MATCH_NAME_GLOB = 2 +} + struct TColumn { 1: required string column_name 2: required Types.TColumnType column_type @@ -43,6 +48,7 @@ struct TColumn { 18: optional bool is_auto_increment = false; 19: optional i32 cluster_key_id = -1 20: optional i32 be_exec_version = -1 + 21: optional TPatternType pattern_type } struct TSlotDescriptor { diff --git a/regression-test/data/variant_p0/predefine/load.out b/regression-test/data/variant_p0/predefine/load.out index ca4161c3a01..e2347eb8508 100644 Binary files a/regression-test/data/variant_p0/predefine/load.out and b/regression-test/data/variant_p0/predefine/load.out differ diff --git a/regression-test/data/variant_p0/predefine/sql/q01.out b/regression-test/data/variant_p0/predefine/sql/q01.out index 54f9bace38b..aa40b7653e9 100644 Binary files a/regression-test/data/variant_p0/predefine/sql/q01.out and b/regression-test/data/variant_p0/predefine/sql/q01.out differ diff --git a/regression-test/suites/variant_p0/predefine/load.groovy b/regression-test/suites/variant_p0/predefine/load.groovy index 7ccb8068024..cf0ad73491f 100644 --- a/regression-test/suites/variant_p0/predefine/load.groovy +++ b/regression-test/suites/variant_p0/predefine/load.groovy @@ -21,7 +21,7 @@ suite("regression_test_variant_predefine_schema", "p0"){ CREATE TABLE `test_predefine` ( `id` bigint NOT NULL, `type` varchar(30) NULL, - `v1` variant<a.b.c:int,ss:string,dcm:decimal,dt:datetime,ip:ipv4,a.b.d:double> NULL, + `v1` variant<'a.b.c':int,'ss':string,'dcm':decimal,'dt':datetime,'ip':ipv4,'a.b.d':double> NULL, INDEX idx_var_sub(`v1`) USING INVERTED PROPERTIES("parser" = "english") ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 3 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "variant_max_subcolumns_count" = "0"); @@ -80,29 +80,29 @@ suite("regression_test_variant_predefine_schema", "p0"){ CREATE TABLE `test_predefine2` ( `id` bigint NOT NULL, `v1` variant< - array_int:array<int>, - array_string:array<string>, - array_decimal:array<decimalv3(27,9)>, - array_datetime:array<datetime>, - array_datetimev2:array<datetimev2>, - array_date:array<date>, - array_datev2:array<datev2>, - array_ipv4:array<ipv4>, - array_ipv6:array<ipv6>, - array_float:array<float>, - array_boolean:array<boolean>, - int_:int, - string_:string, - decimal_:decimalv3(27,9), - datetime_:datetime, - datetimev2_:datetimev2(6), - date_:date, - datev2_:datev2, - ipv4_:ipv4, - ipv6_:ipv6, - float_:float, - boolean_:boolean, - varchar_:varchar + 'array_int':array<int>, + 'array_string':array<string>, + 'array_decimal':array<decimalv3(27,9)>, + 'array_datetime':array<datetime>, + 'array_datetimev2':array<datetimev2>, + 'array_date':array<date>, + 'array_datev2':array<datev2>, + 'array_ipv4':array<ipv4>, + 'array_ipv6':array<ipv6>, + 'array_float':array<float>, + 'array_boolean':array<boolean>, + 'int_':int, + 'string_':string, + 'decimal_':decimalv3(27,9), + 'datetime_':datetime, + 'datetimev2_':datetimev2(6), + 'date_':date, + 'datev2_':datev2, + 'ipv4_':ipv4, + 'ipv6_':ipv6, + 'float_':float, + 'boolean_':boolean, + 'varchar_':varchar > NULL ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 2 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "variant_max_subcolumns_count" = "0"); @@ -201,9 +201,9 @@ suite("regression_test_variant_predefine_schema", "p0"){ // // schema change // // 1. add column - sql "alter table test_predefine1 add column v2 variant<dcm:decimal,dt:datetime> default null" + sql "alter table test_predefine1 add column v2 variant<'dcm':decimal,'dt':datetime> default null" sql """insert into test_predefine1 values(101, '{"a" :1}', '{"dcm": 1111111}')""" - sql "alter table test_predefine1 add column v3 variant<dcm:decimal,dt:datetime,ip:ipv6> default null" + sql "alter table test_predefine1 add column v3 variant<'dcm':decimal,'dt':datetime,'ip':ipv6> default null" sql """insert into test_predefine1 values(102, '{"a" :1}', '{"dcm": 1111111}', '{"dcm": 1111111}');""" // 2. todo support alter column type // sql "alter table test_predefine1 modify column v3 variant<dcm:decimal,dt:datetime,ip:ipv6>" @@ -216,7 +216,7 @@ suite("regression_test_variant_predefine_schema", "p0"){ sql "DROP TABLE IF EXISTS test_predefine3" sql """CREATE TABLE `test_predefine3` ( `id` bigint NOT NULL, - `v` variant<`nested.a`: string> NULL) + `v` variant<'nested.a':string> NULL) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "variant_enable_flatten_nested" = "true", "variant_max_subcolumns_count" = "0");""" @@ -251,7 +251,7 @@ suite("regression_test_variant_predefine_schema", "p0"){ sql """ CREATE TABLE `region_insert` ( `k` bigint NULL, - `var` variant<c_acctbal:text,c_address:text,c_comment:text,c_custkey:text,c_mktsegment:text,c_name:text,c_nationkey:text,c_phone:text,p_brand:float,p_comment:text,p_container:text,p_mfgr:text,p_name:text,p_partkey:text,p_retailprice:text,p_size:text,p_type:text,r_comment:text,r_name:text,r_regionkey:text,ps_availqty:text,ps_comment:text,ps_none:text,ps_partkey:text,ps_suppkey:text,ps_supplycost:text,s_acctbal:text,s_address:text,s_comment:text,s_name:text,s_nationkey:text,s_phone:t [...] + `var` variant<'c_acctbal':text,'c_address':text,'c_comment':text,'c_custkey':text,'c_mktsegment':text,'c_name':text,'c_nationkey':text,'c_phone':text,'p_brand':float,'p_comment':text,'p_container':text,'p_mfgr':text,'p_name':text,'p_partkey':text,'p_retailprice':text,'p_size':text,'p_type':text,'r_comment':text,'r_name':text,'r_regionkey':text,'ps_availqty':text,'ps_comment':text,'ps_none':text,'ps_partkey':text,'ps_suppkey':text,'ps_supplycost':text,'key_46':text,'key_47':text,'ke [...] `OfvZr` variant NULL ) ENGINE=OLAP DUPLICATE KEY(`k`) @@ -281,7 +281,7 @@ suite("regression_test_variant_predefine_schema", "p0"){ sql """ CREATE TABLE `test_bf_with_bool` ( `k` bigint NULL, - `var` variant<c_bool:boolean> + `var` variant<'c_bool':boolean> ) ENGINE=OLAP DUPLICATE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 5 @@ -300,7 +300,7 @@ suite("regression_test_variant_predefine_schema", "p0"){ sql """ CREATE TABLE `test_array_with_nulls` ( `k` bigint NULL, - `var` variant<array_decimal:array<decimalv3(27,9)>> + `var` variant<match_name 'array_decimal':array<decimalv3(27,9)>> ) ENGINE=OLAP DUPLICATE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org