This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-c108335-hive-sql in repository https://gitbox.apache.org/repos/asf/doris.git
commit a16d306f7ca3cc3f578396c65bd760c808e2171a Author: morningman <yun...@selectdb.com> AuthorDate: Wed Apr 9 15:42:11 2025 -0700 [tmp] set hive serde type for FE 1 --- .../org/apache/doris/analysis/ArrayLiteral.java | 2 +- .../org/apache/doris/analysis/BoolLiteral.java | 3 +- .../org/apache/doris/analysis/LiteralExpr.java | 10 +- .../java/org/apache/doris/analysis/MapLiteral.java | 2 +- .../org/apache/doris/analysis/StructLiteral.java | 2 +- .../org/apache/doris/common/FormatOptions.java | 27 +++- .../org/apache/doris/nereids/NereidsPlanner.java | 4 +- .../datatype_p0/serde/test_serde_dialect_hive.out | Bin 0 -> 3969 bytes .../serde/test_serde_dialect_hive.groovy | 138 +++++++++++++++++++++ 9 files changed, 178 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java index 2e0f43758f8..b6aa82d8ccd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java @@ -152,7 +152,7 @@ public class ArrayLiteral extends LiteralExpr { // we should use type to decide we output array is suitable for json format list.add(stringLiteral); }); - return "[" + StringUtils.join(list, ", ") + "]"; + return "[" + StringUtils.join(list, options.getCollectionDelim()) + "]"; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BoolLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BoolLiteral.java index c0d6d885285..2c58c3965ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BoolLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BoolLiteral.java @@ -113,7 +113,8 @@ public class BoolLiteral extends LiteralExpr { @Override public String getStringValueForArray(FormatOptions options) { - return options.getNestedStringWrapper() + getStringValue() + options.getNestedStringWrapper(); + String val = options.isBoolValueNum() ? getStringValue() : (value ? "true" : "false"); + return val; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java index 301b6277725..87841a69900 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java @@ -114,10 +114,16 @@ public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr return literalExpr; } + /** + * 1. For numeric/complex type, no need to wrap with quota, call "getStringValueInFe". + * 2. For other type, call "getStringValueForArray()": + * 2.1. for null/boolean, getStringValueForArray() will return format value in FormatOptions. + * 2.2. for others, getStringValueForArray() will return value wrapped with quota. + */ public static String getStringLiteralForComplexType(Expr v, FormatOptions options) { if (!(v instanceof NullLiteral) && v.getType().isScalarType() - && (Type.getNumericTypes().contains((ScalarType) v.getActualScalarType(v.getType())) - || v.getType() == Type.BOOLEAN)) { + && Type.getNumericTypes().contains((ScalarType) v.getActualScalarType(v.getType()))) { + // This is a numeric type, no need to wrap with quota, so call getStringValueInFe return v.getStringValueInFe(options); } else if (v.getType().isComplexType()) { // these type should also call getStringValueInFe which should handle special case for itself diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java index f330dfa6edb..b7bc7ce6116 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java @@ -193,7 +193,7 @@ public class MapLiteral extends LiteralExpr { list.add(getStringLiteralForComplexType(children.get(i), options) + options.getMapKeyDelim() + getStringLiteralForComplexType(children.get(i + 1), options)); } - return "{" + StringUtils.join(list, ", ") + "}"; + return "{" + StringUtils.join(list, options.getCollectionDelim()) + "}"; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java index 5d888168821..afc6a495af6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java @@ -125,7 +125,7 @@ public class StructLiteral extends LiteralExpr { + options.getMapKeyDelim() + getStringLiteralForComplexType(child, options)); } - return "{" + StringUtils.join(list, ", ") + "}"; + return "{" + StringUtils.join(list, options.getCollectionDelim()) + "}"; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FormatOptions.java b/fe/fe-core/src/main/java/org/apache/doris/common/FormatOptions.java index a63b83ab71d..e89bbc9f011 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/FormatOptions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/FormatOptions.java @@ -28,11 +28,20 @@ public class FormatOptions { private String nestedStringWrapper; private String mapKeyDelim; private String nullFormat; + private String collectionDelim; + // isBoolValue = true means the boolean column in collection type(array, map, ...) will print as 0 or 1. + // false means to print as true/false + // This is only for boolean column within the collection type. + // For top level boolean column, it is always 0/1 + private boolean isBoolValueNum; - public FormatOptions(String nestedStringWrapper, String mapKeyDelim, String nullFormat) { + private FormatOptions(String nestedStringWrapper, String mapKeyDelim, String nullFormat, String collectionDelim, + boolean isBoolValueNum) { this.nestedStringWrapper = nestedStringWrapper; this.mapKeyDelim = mapKeyDelim; this.nullFormat = nullFormat; + this.collectionDelim = collectionDelim; + this.isBoolValueNum = isBoolValueNum; } public String getNestedStringWrapper() { @@ -47,11 +56,23 @@ public class FormatOptions { return this.nullFormat; } + public String getCollectionDelim() { + return collectionDelim; + } + + public boolean isBoolValueNum() { + return isBoolValueNum; + } + public static FormatOptions getDefault() { - return new FormatOptions("\"", ":", "null"); + return new FormatOptions("\"", ":", "null", ", ", true); } public static FormatOptions getForPresto() { - return new FormatOptions("", "=", "NULL"); + return new FormatOptions("", "=", "NULL", ", ", true); + } + + public static FormatOptions getForHive() { + return new FormatOptions("\"", "=", "null", ",", false); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 5f6b74a597b..0a96e577d74 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -809,8 +809,10 @@ public class NereidsPlanner extends Planner { case "trino": statementContext.setFormatOptions(FormatOptions.getForPresto()); break; - case "doris": case "hive": + statementContext.setFormatOptions(FormatOptions.getForHive()); + break; + case "doris": statementContext.setFormatOptions(FormatOptions.getDefault()); break; default: diff --git a/regression-test/data/datatype_p0/serde/test_serde_dialect_hive.out b/regression-test/data/datatype_p0/serde/test_serde_dialect_hive.out new file mode 100644 index 00000000000..720f52767e0 Binary files /dev/null and b/regression-test/data/datatype_p0/serde/test_serde_dialect_hive.out differ diff --git a/regression-test/suites/datatype_p0/serde/test_serde_dialect_hive.groovy b/regression-test/suites/datatype_p0/serde/test_serde_dialect_hive.groovy new file mode 100644 index 00000000000..a67cae451cb --- /dev/null +++ b/regression-test/suites/datatype_p0/serde/test_serde_dialect_hive.groovy @@ -0,0 +1,138 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_serde_dialect_hive", "p0") { + + sql """create database if not exists test_serde_dialect_hive;""" + sql """use test_serde_dialect_hive;""" + sql """drop table if exists test_serde_dialect_hive_tbl""" + sql """ + create table if not exists test_serde_dialect_hive_tbl ( + c1 tinyint, + c2 smallint, + c3 int, + c4 bigint, + c5 largeint, + c6 float, + c7 double, + c8 decimal(27, 9), + c9 date, + c10 datetime, + c11 datetime(6), + c12 ipv4, + c13 ipv6, + c14 string, + c15 char(6), + c16 varchar(1024), + c17 boolean, + c18 json, + c19 array<int>, + c20 array<double>, + c21 array<decimal(10, 5)>, + c22 array<string>, + c23 array<map<string, string>>, + c24 array<array<string>>, + c25 array<struct<s_id:int(11), s_name:string, s_address:string>>, + c26 array<struct<s_id:struct<k1:string, k2:decimal(10,2)>, s_name:array<ipv4>, s_address:map<string, ipv6>>>, + c27 map<string, string>, + c28 map<string, array<array<string>>>, + c29 map<int, map<string, array<array<string>>>>, + c30 map<decimal(5, 3), array<struct<s_id:struct<k1:string, k2:decimal(10,2)>, s_name:array<string>, s_address:map<string, string>>>>, + c31 struct<s_id:int(11), s_name:string, s_address:string>, + c32 struct<s_id:int(11), s_name:array<string>, s_address:string>, + c33 array<date>, + c34 array<datetime(3)>, + c35 array<boolean>, + c36 struct<s_id:int(11), s_name:string, s_gender:boolean>, + c37 map<string, boolean> + ) + distributed by random buckets 1 + properties("replication_num" = "1"); + """ + + sql """ + insert into test_serde_dialect_hive_tbl + (c1, c2,c3, c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c27,c28,c29,c31,c32,c33,c34,c35,c36,c37) + values( + 1,2,3,4,5,1.1,2.0000,123456.123456789,"2024-06-30", "2024-06-30 10:10:11", "2024-06-30 10:10:11.123456", + '59.50.185.152', + 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff', + 'this is a string with , and "', + 'abc ef', + ' 123ndedwdw', + true, + '[1, 2, 3, 4, 5]', + [1,2,3,null,5], + [1.1,2.1,3.1,null,5.00], + [1.1,2.1,3.00000,null,5.12345], + ['abc', 'de, f"', null, ''], + [{'k1': 'v1', 'k2': null, 'k3':'', 'k4':'a , "a'}, {'k1': 'v1', 'k2': null, 'k3 , "abc':'', 'k4':'a , "a'}], + [['abc', 'de, f"', null, ''],[],null], + {'k1': 'v1', 'k2': null, 'k3':'', 'k4':'a , "a'}, + {'k1': [['abc', 'de, f"', null, ''],[],null], 'k2': null}, + {10: {'k1': [['abc', 'de, f"', null, ''],[],null]}, 11: null}, + named_struct('s_id', 100, 's_name', 'abc , "', 's_address', null), + named_struct('s_id', null, 's_name', ['abc', 'de, f"', null, ''], 's_address', ''), + ['2024-06-01',null,'2024-06-03'], + ['2024-06-01 10:10:10',null,'2024-06-03 01:11:23.123'], + [true, true, false, false, true, false, false], + named_struct('s_id', 100, 's_name', 'abc , "', 's_gender', true), + {'k1': false, 'k2': true, 'k3':true, 'k4': false} + ); + """ + + String constant_sql=""" + select 1,2,3,4,5,1.1,2.0000,123456.123456789,"2024-06-30", "2024-06-30 10:10:11", "2024-06-30 10:10:11.123456", + '59.50.185.152', + 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff', + 'this is a string with , and "', + 'abc ef', + ' 123ndedwdw', + true, + '[1, 2, 3, 4, 5]', + [1,2,3,null,5], + [1.1,2.1,3.1,null,5.00], + [1.1,2.1,3.00000,null,5.12345], + ['abc', 'de, f"', null, ''], + [{'k1': 'v1', 'k2': null, 'k3':'', 'k4':'a , "a'}, {'k1': 'v1', 'k2': null, 'k3 , "abc':'', 'k4':'a , "a'}], + [['abc', 'de, f"', null, ''],[],null], + {'k1': 'v1', 'k2': null, 'k3':'', 'k4':'a , "a'}, + {'k1': [['abc', 'de, f"', null, ''],[],null], 'k2': null}, + {10: {'k1': [['abc', 'de, f"', null, ''],[],null]}, 11: null}, + named_struct('s_id', 100, 's_name', 'abc , "', 's_address', null), + named_struct('s_id', null, 's_name', ['abc', 'de, f"', null, ''], 's_address', ''), + ['2024-06-01',null,'2024-06-03'], + ['2024-06-01 10:10:10',null,'2024-06-03 01:11:23.123'], + [true, true, false, false, true, false, false], + named_struct('s_id', 100, 's_name', 'abc , "', 's_gender', true), + {'k1': false, 'k2': true, 'k3':true, 'k4': false} + """ + + sql """set serde_dialect="doris";""" + qt_sql01 """select * from test_serde_dialect_hive_tbl""" + // test fold in FE + qt_sql_fe01 """${constant_sql}""" + sql """set serde_dialect="hive";""" + qt_sql02 """select * from test_serde_dialect_hive_tbl""" + // test fold in FE + qt_sql_fe01 """${constant_sql}""" + + test { + sql """set serde_dialect="invalid"""" + exception "sqlDialect value is invalid" + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org