This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 0fb52c5 [UDF] Fix bug that UDF can't handle constant null value (#2914) 0fb52c5 is described below commit 0fb52c514b5191d48079f804f3123571f0e47c5c Author: Mingyu Chen <morningman....@gmail.com> AuthorDate: Mon Feb 17 22:13:50 2020 +0800 [UDF] Fix bug that UDF can't handle constant null value (#2914) This CL modify the `evalExpr()` of ExpressionFunctions, so that it won't change the `FunctionCallExpr` to `NullLiteral` when there is null parameter in UDF. Which will fix the problem described in ISSUE: #2913 --- .../apache/doris/analysis/CreateFunctionStmt.java | 8 +- .../apache/doris/analysis/ExpressionFunctions.java | 38 +++----- .../java/org/apache/doris/catalog/Catalog.java | 4 + .../java/org/apache/doris/catalog/Function.java | 15 ++- .../java/org/apache/doris/catalog/FunctionSet.java | 38 ++++++-- .../java/org/apache/doris/qe/StmtExecutor.java | 2 +- .../org/apache/doris/alter/BatchRollupJobTest.java | 4 +- .../apache/doris/catalog/CreateFunctionTest.java | 105 +++++++++++++++++++++ .../org/apache/doris/utframe/UtFrameUtils.java | 6 +- gensrc/script/doris_builtins_functions.py | 14 +++ gensrc/script/gen_builtins_functions.py | 11 ++- 11 files changed, 202 insertions(+), 43 deletions(-) diff --git a/fe/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java b/fe/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java index 05c742d..a69412d 100644 --- a/fe/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java @@ -17,9 +17,6 @@ package org.apache.doris.analysis; -import com.google.common.base.Strings; -import com.google.common.collect.ImmutableSortedMap; -import org.apache.commons.codec.binary.Hex; import org.apache.doris.catalog.AggregateFunction; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Function; @@ -31,6 +28,11 @@ import org.apache.doris.common.UserException; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableSortedMap; + +import org.apache.commons.codec.binary.Hex; + import java.io.IOException; import java.io.InputStream; import java.net.URL; diff --git a/fe/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java b/fe/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java index e62b05a..ee9a6f5 100644 --- a/fe/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java +++ b/fe/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java @@ -17,18 +17,20 @@ package org.apache.doris.analysis; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMultimap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Function; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.rewrite.FEFunction; import org.apache.doris.rewrite.FEFunctions; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMultimap; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -46,12 +48,6 @@ public enum ExpressionFunctions { private static final Logger LOG = LogManager.getLogger(ExpressionFunctions.class); private ImmutableMultimap<String, FEFunctionInvoker> functions; - // For most build-in functions, it will return NullLiteral when params contain NullLiteral. - // But a few functions need to handle NullLiteral differently, such as "if". It need to add - // an attribute to LiteralExpr to mark null and check the attribute to decide whether to - // replace the result with NullLiteral when function finished. It leaves to be realized. - // TODO chenhao16. - private ImmutableSet<String> nonNullResultWithNullParamFunctions; private ExpressionFunctions() { registerFunctions(); @@ -71,8 +67,13 @@ public enum ExpressionFunctions { Function fn = constExpr.getFn(); Preconditions.checkNotNull(fn, "Expr's fn can't be null."); - // null - if (!nonNullResultWithNullParamFunctions.contains(fn.getFunctionName().getFunction())) { + + // return NullLiteral directly iff: + // 1. Not UDF + // 2. Not in NonNullResultWithNullParamFunctions + // 3. Has null parameter + if (!Catalog.getCurrentCatalog().isNonNullResultWithNullParamFunction(fn.getFunctionName().getFunction()) + && !fn.isUdf()) { for (Expr e : constExpr.getChildren()) { if (e instanceof NullLiteral) { return new NullLiteral(); @@ -144,15 +145,6 @@ public enum ExpressionFunctions { } } this.functions = mapBuilder.build(); - - // Functions that need to handle null. - ImmutableSet.Builder<String> setBuilder = - new ImmutableSet.Builder<String>(); - setBuilder.add("if"); - setBuilder.add("hll_hash"); - setBuilder.add("concat_ws"); - setBuilder.add("ifnull"); - this.nonNullResultWithNullParamFunctions = setBuilder.build(); } public static class FEFunctionInvoker { diff --git a/fe/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/src/main/java/org/apache/doris/catalog/Catalog.java index 52850fc..0c73b10 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/src/main/java/org/apache/doris/catalog/Catalog.java @@ -5317,6 +5317,10 @@ public class Catalog { return functionSet.getBulitinFunctions(); } + public boolean isNonNullResultWithNullParamFunction(String funcName) { + return functionSet.isNonNullResultWithNullParamFunctions(funcName); + } + /** * create cluster * diff --git a/fe/src/main/java/org/apache/doris/catalog/Function.java b/fe/src/main/java/org/apache/doris/catalog/Function.java index f5f7710..52d36a5 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Function.java +++ b/fe/src/main/java/org/apache/doris/catalog/Function.java @@ -19,15 +19,17 @@ package org.apache.doris.catalog; import static org.apache.doris.common.io.IOUtils.writeOptionString; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; import org.apache.doris.analysis.FunctionName; import org.apache.doris.analysis.HdfsURI; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.thrift.TFunction; import org.apache.doris.thrift.TFunctionBinaryType; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -208,6 +210,13 @@ public class Function implements Writable { public void setChecksum(String checksum) { this.checksum = checksum; } public String getChecksum() { return checksum; } + // TODO(cmy): Currently we judge whether it is UDF by wheter the 'location' is set. + // Maybe we should use a separate variable to identify, + // but additional variables need to modify the persistence information. + public boolean isUdf() { + return location != null; + } + // Returns a string with the signature in human readable format: // FnName(argtype1, argtyp2). e.g. Add(int, int) public String signatureString() { diff --git a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java index a5b436a..114c41c 100644 --- a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -17,9 +17,6 @@ package org.apache.doris.catalog; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import org.apache.doris.analysis.ArithmeticExpr; import org.apache.doris.analysis.BinaryPredicate; import org.apache.doris.analysis.CastExpr; @@ -27,14 +24,20 @@ import org.apache.doris.analysis.InPredicate; import org.apache.doris.analysis.IsNullPredicate; import org.apache.doris.analysis.LikePredicate; import org.apache.doris.builtins.ScalarBuiltins; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; - import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; public class FunctionSet { private static final Logger LOG = LogManager.getLogger(FunctionSet.class); @@ -46,6 +49,16 @@ public class FunctionSet { // FunctionResolutionOrder. private final HashMap<String, List<Function>> functions; + // For most build-in functions, it will return NullLiteral when params contain NullLiteral. + // But a few functions need to handle NullLiteral differently, such as "if". It need to add + // an attribute to LiteralExpr to mark null and check the attribute to decide whether to + // replace the result with NullLiteral when function finished. It leaves to be realized. + // Functions in this set is defined in `gensrc/script/doris_builtins_functions.py`, + // and will be built automatically. + + // cmy: This does not contain any user defined functions. All UDFs handle null values by themselves. + private ImmutableSet<String> nonNullResultWithNullParamFunctions; + public FunctionSet() { functions = Maps.newHashMap(); } @@ -63,6 +76,18 @@ public class FunctionSet { InPredicate.initBuiltins(this); } + public void buildNonNullResultWithNullParamFunction(Set<String> funcNames) { + ImmutableSet.Builder<String> setBuilder = new ImmutableSet.Builder<String>(); + for (String funcName : funcNames) { + setBuilder.add(funcName); + } + this.nonNullResultWithNullParamFunctions = setBuilder.build(); + } + + public boolean isNonNullResultWithNullParamFunctions(String funcName) { + return nonNullResultWithNullParamFunctions.contains(funcName); + } + private static final Map<Type, String> MIN_UPDATE_SYMBOL = ImmutableMap.<Type, String>builder() .put(Type.BOOLEAN, @@ -746,8 +771,7 @@ public class FunctionSet { return null; } - // Only used - public boolean addFunction(Function fn) { + private boolean addFunction(Function fn, boolean isBuiltin) { // TODO: add this to persistent store if (getFunction(fn, Function.CompareMode.IS_INDISTINGUISHABLE) != null) { return false; @@ -791,7 +815,7 @@ public class FunctionSet { * Adds a builtin to this database. The function must not already exist. */ public void addBuiltin(Function fn) { - addFunction(fn); + addFunction(fn, true); } // Populate all the aggregate builtins in the catalog. diff --git a/fe/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/src/main/java/org/apache/doris/qe/StmtExecutor.java index de9517f..5430c75 100644 --- a/fe/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -547,7 +547,7 @@ public class StmtExecutor { coord.exec(); - // if python's MysqlDb get error after sendfields, it can't catch the excpetion + // if python's MysqlDb get error after sendfields, it can't catch the exception // so We need to send fields after first batch arrived // send result diff --git a/fe/src/test/java/org/apache/doris/alter/BatchRollupJobTest.java b/fe/src/test/java/org/apache/doris/alter/BatchRollupJobTest.java index 1c92db4..a24e307 100644 --- a/fe/src/test/java/org/apache/doris/alter/BatchRollupJobTest.java +++ b/fe/src/test/java/org/apache/doris/alter/BatchRollupJobTest.java @@ -40,8 +40,6 @@ public class BatchRollupJobTest { private static String runningDir = "fe/mocked/BatchRollupJobTest/" + UUID.randomUUID().toString() + "/"; - private static ConnectContext ctx = UtFrameUtils.createDefaultCtx(); - @BeforeClass public static void setup() throws Exception { UtFrameUtils.createMinDorisCluster(runningDir); @@ -49,7 +47,7 @@ public class BatchRollupJobTest { @Test public void test() throws Exception { - System.out.println("xxx"); + ConnectContext ctx = UtFrameUtils.createDefaultCtx(); // create database db1 String createDbStmtStr = "create database db1;"; CreateDbStmt createDbStmt = (CreateDbStmt) UtFrameUtils.parseAndAnalyzeStmt(createDbStmtStr, ctx); diff --git a/fe/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java b/fe/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java new file mode 100644 index 0000000..74c37d7 --- /dev/null +++ b/fe/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.analysis.CreateDbStmt; +import org.apache.doris.analysis.CreateFunctionStmt; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.FunctionCallExpr; +import org.apache.doris.common.jmockit.Deencapsulation; +import org.apache.doris.planner.PlanFragment; +import org.apache.doris.planner.Planner; +import org.apache.doris.planner.UnionNode; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.QueryState; +import org.apache.doris.qe.StmtExecutor; +import org.apache.doris.utframe.UtFrameUtils; + +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.File; +import java.util.List; +import java.util.UUID; + +/* + * Author: Chenmingyu + * Date: Feb 16, 2020 + */ + +public class CreateFunctionTest { + + private static String runningDir = "fe/mocked/CreateFunctionTest/" + UUID.randomUUID().toString() + "/"; + + @BeforeClass + public static void setup() throws Exception { + UtFrameUtils.createMinDorisCluster(runningDir); + } + + @AfterClass + public static void teardown() { + File file = new File("fe/mocked/CreateFunctionTest/"); + file.delete(); + } + + @Test + public void test() throws Exception { + ConnectContext ctx = UtFrameUtils.createDefaultCtx(); + + // create database db1 + String createDbStmtStr = "create database db1;"; + CreateDbStmt createDbStmt = (CreateDbStmt) UtFrameUtils.parseAndAnalyzeStmt(createDbStmtStr, ctx); + Catalog.getCurrentCatalog().createDb(createDbStmt); + System.out.println(Catalog.getCurrentCatalog().getDbNames()); + + Database db = Catalog.getCurrentCatalog().getDb("default_cluster:db1"); + Assert.assertNotNull(db); + + String createFuncStr = "create function db1.my_add(VARCHAR(1024)) RETURNS BOOLEAN properties\n" + + "(\n" + + "\"symbol\" = \"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_9StringValE\",\n" + + "\"prepare_fn\" = \"_ZN9doris_udf13AddUdfPrepareEPNS_15FunctionContextENS0_18FunctionStateScopeE\",\n" + + "\"close_fn\" = \"_ZN9doris_udf11AddUdfCloseEPNS_15FunctionContextENS0_18FunctionStateScopeE\",\n" + + "\"object_file\" = \"http://nmg01-inf-dorishb00.nmg01.baidu.com:8456/libcmy_udf.so\"\n" + + ");"; + + CreateFunctionStmt createFunctionStmt = (CreateFunctionStmt) UtFrameUtils.parseAndAnalyzeStmt(createFuncStr, ctx); + Catalog.getCurrentCatalog().createFunction(createFunctionStmt); + + List<Function> functions = db.getFunctions(); + Assert.assertEquals(1, functions.size()); + Assert.assertTrue(functions.get(0).isUdf()); + + String queryStr = "select db1.my_add(null)"; + ctx.getState().reset(); + StmtExecutor stmtExecutor = new StmtExecutor(ctx, queryStr); + stmtExecutor.execute(); + Assert.assertNotEquals(QueryState.MysqlStateType.ERR, ctx.getState().getStateType()); + Planner planner = stmtExecutor.planner(); + Assert.assertEquals(1, planner.getFragments().size()); + PlanFragment fragment = planner.getFragments().get(0); + Assert.assertTrue(fragment.getPlanRoot() instanceof UnionNode); + UnionNode unionNode = (UnionNode)fragment.getPlanRoot(); + List<List<Expr>> constExprLists = Deencapsulation.getField(unionNode, "constExprLists_"); + Assert.assertEquals(1, constExprLists.size()); + Assert.assertEquals(1, constExprLists.get(0).size()); + Assert.assertTrue(constExprLists.get(0).get(0) instanceof FunctionCallExpr); + } +} diff --git a/fe/src/test/java/org/apache/doris/utframe/UtFrameUtils.java b/fe/src/test/java/org/apache/doris/utframe/UtFrameUtils.java index 325afc9..f35b1af 100644 --- a/fe/src/test/java/org/apache/doris/utframe/UtFrameUtils.java +++ b/fe/src/test/java/org/apache/doris/utframe/UtFrameUtils.java @@ -42,6 +42,7 @@ import com.google.common.collect.Maps; import java.io.IOException; import java.io.StringReader; +import java.nio.channels.SocketChannel; import java.util.List; import java.util.Map; import java.util.Random; @@ -49,8 +50,9 @@ import java.util.Random; public class UtFrameUtils { // Help to create a mocked ConnectContext. - public static ConnectContext createDefaultCtx() { - ConnectContext ctx = new ConnectContext(); + public static ConnectContext createDefaultCtx() throws IOException { + SocketChannel channel = SocketChannel.open(); + ConnectContext ctx = new ConnectContext(channel); ctx.setCluster(SystemInfoService.DEFAULT_CLUSTER); ctx.setCurrentUserIdentity(UserIdentity.ROOT); ctx.setQualifiedUser(PaloAuth.ROOT_USER); diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 0b938a0..919c0ff 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -708,5 +708,19 @@ visible_functions = [ [['grouping'], 'BIGINT', ['BIGINT'], '_ZN5doris21GroupingSetsFunctions8groupingEPN9doris_udf15FunctionContextERKNS1_9BigIntValE'], ] +# Except the following functions, other function will directly return +# null if there is null parameters. +# Functions in this set will handle null values, not just return null. +# +# This set is only used to replace 'functions with null parameters' with NullLiteral +# when applying FoldConstantsRule rules on the FE side. +# TODO(cmy): Are these functions only required to handle null values? +non_null_result_with_null_param_functions = [ + 'if', + 'hll_hash', + 'concat_ws', + 'ifnull' +] + invisible_functions = [ ] diff --git a/gensrc/script/gen_builtins_functions.py b/gensrc/script/gen_builtins_functions.py index 8e906b7..647a812 100755 --- a/gensrc/script/gen_builtins_functions.py +++ b/gensrc/script/gen_builtins_functions.py @@ -36,6 +36,8 @@ package org.apache.doris.builtins;\n\ \n\ import org.apache.doris.catalog.PrimitiveType;\n\ import org.apache.doris.catalog.FunctionSet;\n\ +import com.google.common.collect.Sets;\n\ +import java.util.Set;\n\ \n\ public class ScalarBuiltins { \n\ public static void initBuiltins(FunctionSet functionSet) { \ @@ -111,9 +113,16 @@ def generate_fe_registry_init(filename): for entry in meta_data_entries: for name in entry["sql_names"]: java_output = generate_fe_entry(entry, name) - java_registry_file.write(" functionSet.addScalarBuiltin(%s);\n" % java_output) + java_registry_file.write(" functionSet.addScalarBuiltin(%s);\n" % java_output) java_registry_file.write("\n") + + # add non_null_result_with_null_param_functions + java_registry_file.write(" Set<String> funcNames = Sets.newHashSet();\n") + for entry in doris_builtins_functions.non_null_result_with_null_param_functions: + java_registry_file.write(" funcNames.add(\"%s\");\n" % entry) + java_registry_file.write(" functionSet.buildNonNullResultWithNullParamFunction(funcNames);\n"); + java_registry_file.write(java_registry_epilogue) java_registry_file.close() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org