This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new c19c2462056 [bugfix](iceberg)Convert the datetime type in the predicate according to the target column (#32923) c19c2462056 is described below commit c19c24620562c6057aebc15c7e290beec6eecb14 Author: wuwenchi <wuwenchi...@hotmail.com> AuthorDate: Sat Mar 30 23:55:47 2024 +0800 [bugfix](iceberg)Convert the datetime type in the predicate according to the target column (#32923) Convert the datetime type in the predicate according to the target column. And add a testcase for #32194 related #30478 #30162 --- .../doris/datasource/iceberg/IcebergUtils.java | 214 +++++++++++++++------ .../datasource/iceberg/TestIcebergPredict.java | 135 +++++++++++++ .../hive/test_external_catalog_glue_table.out | 3 + .../hive/test_external_catalog_glue_table.groovy | 1 + 4 files changed, 294 insertions(+), 59 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java index aba7167eda0..76b5bfb5105 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java @@ -50,6 +50,8 @@ import org.apache.iceberg.Snapshot; import org.apache.iceberg.Table; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.Unbound; +import org.apache.iceberg.types.Type.TypeID; import org.apache.iceberg.types.Types; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -87,51 +89,46 @@ public class IcebergUtils { return null; } + Expression expression = null; // BoolLiteral if (expr instanceof BoolLiteral) { BoolLiteral boolLiteral = (BoolLiteral) expr; boolean value = boolLiteral.getValue(); if (value) { - return Expressions.alwaysTrue(); + expression = Expressions.alwaysTrue(); } else { - return Expressions.alwaysFalse(); + expression = Expressions.alwaysFalse(); } - } - - // CompoundPredicate - if (expr instanceof CompoundPredicate) { + } else if (expr instanceof CompoundPredicate) { CompoundPredicate compoundPredicate = (CompoundPredicate) expr; switch (compoundPredicate.getOp()) { case AND: { Expression left = convertToIcebergExpr(compoundPredicate.getChild(0), schema); Expression right = convertToIcebergExpr(compoundPredicate.getChild(1), schema); if (left != null && right != null) { - return Expressions.and(left, right); + expression = Expressions.and(left, right); } - return null; + break; } case OR: { Expression left = convertToIcebergExpr(compoundPredicate.getChild(0), schema); Expression right = convertToIcebergExpr(compoundPredicate.getChild(1), schema); if (left != null && right != null) { - return Expressions.or(left, right); + expression = Expressions.or(left, right); } - return null; + break; } case NOT: { Expression child = convertToIcebergExpr(compoundPredicate.getChild(0), schema); if (child != null) { - return Expressions.not(child); + expression = Expressions.not(child); } - return null; + break; } default: return null; } - } - - // BinaryPredicate - if (expr instanceof BinaryPredicate) { + } else if (expr instanceof BinaryPredicate) { TExprOpcode opCode = expr.getOpcode(); switch (opCode) { case EQ: @@ -156,38 +153,44 @@ public class IcebergUtils { String colName = slotRef.getColumnName(); Types.NestedField nestedField = schema.caseInsensitiveFindField(colName); colName = nestedField.name(); - Object value = extractDorisLiteral(literalExpr); + Object value = extractDorisLiteral(nestedField.type(), literalExpr); if (value == null) { if (opCode == TExprOpcode.EQ_FOR_NULL && literalExpr instanceof NullLiteral) { - return Expressions.isNull(colName); + expression = Expressions.isNull(colName); } else { return null; } + } else { + switch (opCode) { + case EQ: + case EQ_FOR_NULL: + expression = Expressions.equal(colName, value); + break; + case NE: + expression = Expressions.not(Expressions.equal(colName, value)); + break; + case GE: + expression = Expressions.greaterThanOrEqual(colName, value); + break; + case GT: + expression = Expressions.greaterThan(colName, value); + break; + case LE: + expression = Expressions.lessThanOrEqual(colName, value); + break; + case LT: + expression = Expressions.lessThan(colName, value); + break; + default: + return null; + } } - switch (opCode) { - case EQ: - case EQ_FOR_NULL: - return Expressions.equal(colName, value); - case NE: - return Expressions.not(Expressions.equal(colName, value)); - case GE: - return Expressions.greaterThanOrEqual(colName, value); - case GT: - return Expressions.greaterThan(colName, value); - case LE: - return Expressions.lessThanOrEqual(colName, value); - case LT: - return Expressions.lessThan(colName, value); - default: - return null; - } + break; default: return null; } - } - - // InPredicate, only support a in (1,2,3) - if (expr instanceof InPredicate) { + } else if (expr instanceof InPredicate) { + // InPredicate, only support a in (1,2,3) InPredicate inExpr = (InPredicate) expr; if (inExpr.contains(Subquery.class)) { return null; @@ -196,56 +199,149 @@ public class IcebergUtils { if (slotRef == null) { return null; } + String colName = slotRef.getColumnName(); + Types.NestedField nestedField = schema.caseInsensitiveFindField(colName); + colName = nestedField.name(); List<Object> valueList = new ArrayList<>(); for (int i = 1; i < inExpr.getChildren().size(); ++i) { if (!(inExpr.getChild(i) instanceof LiteralExpr)) { return null; } LiteralExpr literalExpr = (LiteralExpr) inExpr.getChild(i); - Object value = extractDorisLiteral(literalExpr); + Object value = extractDorisLiteral(nestedField.type(), literalExpr); valueList.add(value); } - String colName = slotRef.getColumnName(); - Types.NestedField nestedField = schema.caseInsensitiveFindField(colName); - colName = nestedField.name(); if (inExpr.isNotIn()) { // not in - return Expressions.notIn(colName, valueList); + expression = Expressions.notIn(colName, valueList); } else { // in - return Expressions.in(colName, valueList); + expression = Expressions.in(colName, valueList); } } + if (expression != null && expression instanceof Unbound) { + try { + ((Unbound<?, ?>) expression).bind(schema.asStruct(), true); + return expression; + } catch (Exception e) { + LOG.warn("Failed to check expression: " + e.getMessage()); + return null; + } + } return null; } - private static Object extractDorisLiteral(Expr expr) { - if (!expr.isLiteral()) { - return null; - } + public static Object extractDorisLiteral(org.apache.iceberg.types.Type icebergType, Expr expr) { + TypeID icebergTypeID = icebergType.typeId(); if (expr instanceof BoolLiteral) { BoolLiteral boolLiteral = (BoolLiteral) expr; - return boolLiteral.getValue(); + switch (icebergTypeID) { + case BOOLEAN: + return boolLiteral.getValue(); + case STRING: + return boolLiteral.getStringValue(); + default: + return null; + } } else if (expr instanceof DateLiteral) { DateLiteral dateLiteral = (DateLiteral) expr; - if (dateLiteral.isDateType() || dateLiteral.isDateTimeType()) { - return dateLiteral.getStringValue(); - } else { - return dateLiteral.unixTimestamp(TimeUtils.getTimeZone()) * MILLIS_TO_NANO_TIME; + switch (icebergTypeID) { + case STRING: + return dateLiteral.getStringValue(); + case TIMESTAMP: + return dateLiteral.unixTimestamp(TimeUtils.getTimeZone()) * MILLIS_TO_NANO_TIME; + default: + return null; } } else if (expr instanceof DecimalLiteral) { DecimalLiteral decimalLiteral = (DecimalLiteral) expr; - return decimalLiteral.getValue(); + switch (icebergTypeID) { + case DECIMAL: + return decimalLiteral.getValue(); + case STRING: + return decimalLiteral.getStringValue(); + case DOUBLE: + return decimalLiteral.getDoubleValue(); + default: + return null; + } } else if (expr instanceof FloatLiteral) { FloatLiteral floatLiteral = (FloatLiteral) expr; - return floatLiteral.getValue(); + if (floatLiteral.getType() == Type.FLOAT) { + switch (icebergTypeID) { + case FLOAT: + case DOUBLE: + case DECIMAL: + return floatLiteral.getValue(); + default: + return null; + } + } else { + switch (icebergTypeID) { + case DOUBLE: + case DECIMAL: + return floatLiteral.getValue(); + default: + return null; + } + } } else if (expr instanceof IntLiteral) { IntLiteral intLiteral = (IntLiteral) expr; - return intLiteral.getValue(); + Type type = intLiteral.getType(); + if (type.isInteger32Type()) { + switch (icebergTypeID) { + case INTEGER: + case LONG: + case FLOAT: + case DOUBLE: + case DATE: + case DECIMAL: + return (int) intLiteral.getValue(); + default: + return null; + } + } else { + // only PrimitiveType.BIGINT + switch (icebergTypeID) { + case INTEGER: + case LONG: + case FLOAT: + case DOUBLE: + case TIME: + case TIMESTAMP: + case DATE: + case DECIMAL: + return intLiteral.getValue(); + default: + return null; + } + } } else if (expr instanceof StringLiteral) { - StringLiteral stringLiteral = (StringLiteral) expr; - return stringLiteral.getStringValue(); + String value = expr.getStringValue(); + switch (icebergTypeID) { + case DATE: + case TIME: + case TIMESTAMP: + case STRING: + case UUID: + case DECIMAL: + return value; + case INTEGER: + try { + return Integer.parseInt(value); + } catch (Exception e) { + return null; + } + case LONG: + try { + return Long.parseLong(value); + } catch (Exception e) { + return null; + } + default: + return null; + } } return null; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/TestIcebergPredict.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/TestIcebergPredict.java new file mode 100644 index 00000000000..80b1c62819b --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/TestIcebergPredict.java @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.iceberg; + +import org.apache.doris.analysis.BinaryPredicate; +import org.apache.doris.analysis.BoolLiteral; +import org.apache.doris.analysis.DateLiteral; +import org.apache.doris.analysis.DecimalLiteral; +import org.apache.doris.analysis.FloatLiteral; +import org.apache.doris.analysis.IntLiteral; +import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.SlotRef; +import org.apache.doris.analysis.StringLiteral; +import org.apache.doris.analysis.TableName; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; + +import org.apache.iceberg.Schema; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.types.Types; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +public class TestIcebergPredict { + + public static Schema schema; + + @BeforeClass + public static void before() throws AnalysisException { + schema = new Schema( + Types.NestedField.required(1, "c_int", Types.IntegerType.get()), + Types.NestedField.required(2, "c_long", Types.LongType.get()), + Types.NestedField.required(3, "c_bool", Types.BooleanType.get()), + Types.NestedField.required(4, "c_float", Types.FloatType.get()), + Types.NestedField.required(5, "c_double", Types.DoubleType.get()), + Types.NestedField.required(6, "c_dec", Types.DecimalType.of(20, 10)), + Types.NestedField.required(7, "c_date", Types.DateType.get()), + Types.NestedField.required(8, "c_ts", Types.TimestampType.withoutZone()), + Types.NestedField.required(10, "c_str", Types.StringType.get()) + ); + } + + @Test + public void testBinaryPredicate() throws AnalysisException { + List<LiteralExpr> literalList = new ArrayList<LiteralExpr>() {{ + add(new BoolLiteral(true)); + add(new DateLiteral("2023-01-02", Type.DATEV2)); + add(new DateLiteral("2024-01-02 12:34:56.123456", Type.DATETIMEV2)); + add(new DecimalLiteral(new BigDecimal("1.23"))); + add(new FloatLiteral(1.23, Type.FLOAT)); + add(new FloatLiteral(3.456, Type.DOUBLE)); + add(new IntLiteral(1, Type.TINYINT)); + add(new IntLiteral(1, Type.SMALLINT)); + add(new IntLiteral(1, Type.INT)); + add(new IntLiteral(1, Type.BIGINT)); + add(new StringLiteral("abc")); + add(new StringLiteral("2023-01-02")); + add(new StringLiteral("2023-01-02 01:02:03.456789")); + }}; + + List<SlotRef> slotRefs = new ArrayList<SlotRef>() {{ + add(new SlotRef(new TableName(), "c_int")); + add(new SlotRef(new TableName(), "c_long")); + add(new SlotRef(new TableName(), "c_bool")); + add(new SlotRef(new TableName(), "c_float")); + add(new SlotRef(new TableName(), "c_double")); + add(new SlotRef(new TableName(), "c_dec")); + add(new SlotRef(new TableName(), "c_date")); + add(new SlotRef(new TableName(), "c_ts")); + add(new SlotRef(new TableName(), "c_str")); + }}; + + // true indicates support for pushdown + Boolean[][] expects = new Boolean[][] { + { // int + false, false, false, false, false, false, true, true, true, true, false, false, false + }, + { // long + false, false, false, false, false, false, true, true, true, true, false, false, false + }, + { // boolean + true, false, false, false, false, false, false, false, false, false, false, false, false + }, + { // float + false, false, false, false, true, false, true, true, true, true, false, false, false + }, + { // double + false, false, false, true, true, true, true, true, true, true, false, false, false + }, + { // decimal + false, false, false, true, true, true, true, true, true, true, false, false, false + }, + { // date + false, false, false, false, false, false, true, true, true, true, false, true, false + }, + { // timestamp + false, true, true, false, false, false, false, false, false, true, false, false, false + }, + { // string + true, true, true, true, false, false, false, false, false, false, true, true, true + } + }; + + for (int i = 0; i < slotRefs.size(); i++) { + final int loc = i; + List<Boolean> ret = literalList.stream().map(literal -> { + BinaryPredicate expr = new BinaryPredicate(BinaryPredicate.Operator.EQ, slotRefs.get(loc), literal); + Expression expression = IcebergUtils.convertToIcebergExpr(expr, schema); + return expression != null; + }).collect(Collectors.toList()); + Assert.assertArrayEquals(expects[i], ret.toArray()); + } + } +} diff --git a/regression-test/data/external_table_p2/hive/test_external_catalog_glue_table.out b/regression-test/data/external_table_p2/hive/test_external_catalog_glue_table.out index 372dac93728..206cf483b45 100644 --- a/regression-test/data/external_table_p2/hive/test_external_catalog_glue_table.out +++ b/regression-test/data/external_table_p2/hive/test_external_catalog_glue_table.out @@ -137,3 +137,6 @@ b5e6bf2b5 10410585 \N 1938534851 2023-03-07T20:35:17.731 955.1760424982325 643e7c71b83d444e9261 67.0202 6a15d14103dc4 55b15adbec34 true 10055090 \N 2147483647 2023-03-07T20:38:59.078 1387.1527042831178 47 67.7351 c4c5 960637955914682b6 true +-- !q18 -- +11801003 35210325 + diff --git a/regression-test/suites/external_table_p2/hive/test_external_catalog_glue_table.groovy b/regression-test/suites/external_table_p2/hive/test_external_catalog_glue_table.groovy index db230f7e8c9..5b3edd44c78 100644 --- a/regression-test/suites/external_table_p2/hive/test_external_catalog_glue_table.groovy +++ b/regression-test/suites/external_table_p2/hive/test_external_catalog_glue_table.groovy @@ -48,6 +48,7 @@ suite("test_external_catalog_glue_table", "p2,external,hive,external_remote,exte qt_q15 """ select count(1) from iceberg_glue_types """ qt_q16 """ select glue_timstamp from iceberg_glue_types where glue_timstamp > '2023-03-07 20:35:59' order by glue_timstamp limit 5 """ qt_q17 """ select * from iceberg_glue_types order by glue_decimal limit 5 """ + qt_q18 """ select glue_int, glue_varchar from iceberg_glue_types where glue_varchar > date '2023-03-07' """ } sql """ use `iceberg_catalog`; """ q01() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org