This is an automated email from the ASF dual-hosted git repository. zykkk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b7faf57163a [improvement](jdbc catalog) Disallow non-constant type conversion pushdown and implicit conversion pushdown (#42102) b7faf57163a is described below commit b7faf57163a6825d19782ede7b0a6a0c77315690 Author: zy-kkk <zhongy...@gmail.com> AuthorDate: Tue Oct 22 14:23:03 2024 +0800 [improvement](jdbc catalog) Disallow non-constant type conversion pushdown and implicit conversion pushdown (#42102) Add a variable `enable_jdbc_cast_predicate_push_down`, the default value is false, which prohibits the pushdown of non-constant predicates with type conversion and all predicates with implicit conversion. This change can prevent the wrong predicates from being pushed down to the Jdbc data source, resulting in query data errors, because the predicates with cast were not correctly pushed down to the data source before. If you find that the data is read correctly and the performance is better before this change, you can manually set this variable to true ``` | Expression | Can Push Down | |-----------------------------------------------------|---------------| | column type equals const type | Yes | | column type equals cast const type | Yes | | cast column type equals const type | No | | cast column type equals cast const type | No | | column type not equals column type | No | | column type not equals cast const type | No | | cast column type not equals const type | No | | cast column type not equals cast const type | No | ``` --- .../docker-compose/mysql/init/03-create-table.sql | 7 ++ .../docker-compose/mysql/init/04-insert.sql | 2 + .../doris/datasource/jdbc/source/JdbcScanNode.java | 18 +++- .../java/org/apache/doris/qe/SessionVariable.java | 14 ++- .../jdbc/test_jdbc_catalog_push_cast.out | 4 + .../jdbc/test_jdbc_catalog_push_cast.groovy | 105 +++++++++++++++++++++ 6 files changed, 146 insertions(+), 4 deletions(-) diff --git a/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql b/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql index 9c29d2fb00c..972e35546d0 100644 --- a/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql +++ b/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql @@ -505,3 +505,10 @@ CREATE TABLE doris_test.`t_char` ( `char_col` char(255) COLLATE utf8_bin DEFAULT NULL ); +CREATE TABLE doris_test.`test_cast` ( + `id` int(11) DEFAULT NULL, + `int_c` varchar(100), + `date_c` varchar(100), + `datetime_c` varchar(100) +); + diff --git a/docker/thirdparties/docker-compose/mysql/init/04-insert.sql b/docker/thirdparties/docker-compose/mysql/init/04-insert.sql index 5784edaed5b..a852012fa94 100644 --- a/docker/thirdparties/docker-compose/mysql/init/04-insert.sql +++ b/docker/thirdparties/docker-compose/mysql/init/04-insert.sql @@ -1205,3 +1205,5 @@ INSERT INTO doris_test.t_char (char_col) VALUES (REPEAT('a', 255)); INSERT INTO doris_test.t_char (char_col) VALUES (REPEAT('中', 255)); +INSERT INTO doris_test.`test_cast` VALUES (1, '1', '2022-01-01', '2022-01-01 00:00:01'); +INSERT INTO doris_test.`test_cast` VALUES (2, '2', '2022-01-02', '2022-01-02 00:00:01'); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java index 03e2656a0f9..911d932adde 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java @@ -20,6 +20,7 @@ package org.apache.doris.datasource.jdbc.source; import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.BinaryPredicate; import org.apache.doris.analysis.BoolLiteral; +import org.apache.doris.analysis.CastExpr; import org.apache.doris.analysis.DateLiteral; import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.ExprSubstitutionMap; @@ -308,11 +309,20 @@ public class JdbcScanNode extends ExternalScanNode { private static boolean shouldPushDownConjunct(TOdbcTableType tableType, Expr expr) { // Prevent pushing down expressions with NullLiteral to Oracle if (ConnectContext.get() != null - && !ConnectContext.get().getSessionVariable().jdbcOracleNullPredicatePushdown + && !ConnectContext.get().getSessionVariable().enableJdbcOracleNullPredicatePushDown && containsNullLiteral(expr) && tableType.equals(TOdbcTableType.ORACLE)) { return false; } + + // Prevent pushing down cast expressions if ConnectContext is null or cast pushdown is disabled + if (ConnectContext.get() == null || !ConnectContext.get() + .getSessionVariable().enableJdbcCastPredicatePushDown) { + if (containsCastExpr(expr)) { + return false; + } + } + if (containsFunctionCallExpr(expr)) { if (tableType.equals(TOdbcTableType.MYSQL) || tableType.equals(TOdbcTableType.CLICKHOUSE) || tableType.equals(TOdbcTableType.ORACLE)) { @@ -384,4 +394,10 @@ public class JdbcScanNode extends ExternalScanNode { expr.collect(NullLiteral.class, nullExprList); return !nullExprList.isEmpty(); } + + private static boolean containsCastExpr(Expr expr) { + List<CastExpr> castExprList = Lists.newArrayList(); + expr.collect(CastExpr.class, castExprList); + return !castExprList.isEmpty(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index e04e2fe9c45..f4b8c0aa2e9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -502,7 +502,10 @@ public class SessionVariable implements Serializable, Writable { public static final String JDBC_CLICKHOUSE_QUERY_FINAL = "jdbc_clickhouse_query_final"; - public static final String JDBC_ORACLE_NULL_PREDICATE_PUSHDOWN = "jdbc_oracle_null_predicate_pushdown"; + public static final String ENABLE_JDBC_ORACLE_NULL_PREDICATE_PUSH_DOWN + = "enable_jdbc_oracle_null_predicate_push_down"; + + public static final String ENABLE_JDBC_CAST_PREDICATE_PUSH_DOWN = "enable_jdbc_cast_predicate_push_down"; public static final String ENABLE_MEMTABLE_ON_SINK_NODE = "enable_memtable_on_sink_node"; @@ -695,10 +698,15 @@ public class SessionVariable implements Serializable, Writable { "Whether to add the FINAL keyword to the query SQL when querying ClickHouse JDBC external tables."}) public boolean jdbcClickhouseQueryFinal = false; - @VariableMgr.VarAttr(name = JDBC_ORACLE_NULL_PREDICATE_PUSHDOWN, needForward = true, + @VariableMgr.VarAttr(name = ENABLE_JDBC_ORACLE_NULL_PREDICATE_PUSH_DOWN, needForward = true, description = {"是否允许将 NULL 谓词下推到 Oracle JDBC 外部表。", "Whether to allow NULL predicates to be pushed down to Oracle JDBC external tables."}) - public boolean jdbcOracleNullPredicatePushdown = false; + public boolean enableJdbcOracleNullPredicatePushDown = false; + + @VariableMgr.VarAttr(name = ENABLE_JDBC_CAST_PREDICATE_PUSH_DOWN, needForward = true, + description = {"是否允许将带有 CAST 表达式的谓词下推到 JDBC 外部表。", + "Whether to allow predicates with CAST expressions to be pushed down to JDBC external tables."}) + public boolean enableJdbcCastPredicatePushDown = false; @VariableMgr.VarAttr(name = ROUND_PRECISE_DECIMALV2_VALUE) public boolean roundPreciseDecimalV2Value = false; diff --git a/regression-test/data/external_table_p0/jdbc/test_jdbc_catalog_push_cast.out b/regression-test/data/external_table_p0/jdbc/test_jdbc_catalog_push_cast.out new file mode 100644 index 00000000000..0673bc8c9a2 --- /dev/null +++ b/regression-test/data/external_table_p0/jdbc/test_jdbc_catalog_push_cast.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 1 2022-01-01 2022-01-01 00:00:01 + diff --git a/regression-test/suites/external_table_p0/jdbc/test_jdbc_catalog_push_cast.groovy b/regression-test/suites/external_table_p0/jdbc/test_jdbc_catalog_push_cast.groovy new file mode 100644 index 00000000000..6271a1619a5 --- /dev/null +++ b/regression-test/suites/external_table_p0/jdbc/test_jdbc_catalog_push_cast.groovy @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_jdbc_catalog_push_cast", "p0,external,mysql,external_docker,external_docker_mysql") { + String enabled = context.config.otherConfigs.get("enableJdbcTest") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String s3_endpoint = getS3Endpoint() + String bucket = getS3BucketName() + String driver_url = "https://${bucket}.${s3_endpoint}/regression/jdbc_driver/mysql-connector-j-8.3.0.jar" + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String mysql_port = context.config.otherConfigs.get("mysql_57_port"); + + sql """drop catalog if exists jdbc_catalog_push_cast """ + sql """create catalog if not exists jdbc_catalog_push_cast properties( + "type"="jdbc", + "user"="root", + "password"="123456", + "jdbc_url" = "jdbc:mysql://${externalEnvIp}:${mysql_port}/doris_test?useSSL=false", + "driver_url" = "${driver_url}", + "driver_class" = "com.mysql.cj.jdbc.Driver" + );""" + + sql "use jdbc_catalog_push_cast.doris_test" + + qt_sql """select * from test_cast where date(datetime_c) = '2022-01-01';""" + + explain { + sql("select * from test_cast where date(datetime_c) = '2022-01-01';") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast`") + } + + explain { + sql("select * from test_cast where datetime_c = now();") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast`") + } + + explain { + sql("select * from test_cast where datetime_c = cast(cast('2022-01-01 00:00:01' as datetime) as string);") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast` WHERE ((`datetime_c` = '2022-01-01 00:00:01'))") + } + + explain { + sql("select * from test_cast where cast(datetime_c as datetime) = cast('2022-01-01 00:00:01' as datetime);") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast`") + } + + explain { + sql("select * from test_cast where date_c = cast(cast('2022-01-01 00:00:01' as datetime) as date)") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast`") + } + + explain { + sql("select * from test_cast where datetime_c = date_c;") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast`") + } + + explain { + sql("select * from test_cast where datetime_c = '2022-01-01';") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast`") + } + + explain { + sql("select * from test_cast where cast(datetime_c as string) = '2022-01-01 00:00:01';") + contains("SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast`") + } + + explain { + sql("select * from test_cast where datetime_c != '2022-01-01 00:00:01';") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast` WHERE ((`datetime_c` != '2022-01-01 00:00:01'))") + } + + explain { + sql("select * from test_cast where datetime_c in (cast('2022-01-01' as datetime), cast('2022-02-01' as datetime));") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast`") + } + + explain { + sql("select * from test_cast where date_c = cast(cast('2022-01-01 00:00:01' as datetime) as date) and id = 1;") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast` WHERE ((`id` = 1))") + } + + sql "set enable_jdbc_cast_predicate_push_down = true;" + + explain { + sql("select * from test_cast where cast(datetime_c as datetime) = cast('2022-01-01 00:00:01' as datetime);") + contains("QUERY: SELECT `id`, `int_c`, `date_c`, `datetime_c` FROM `doris_test`.`test_cast` WHERE (`datetime_c` = '2022-01-01 00:00:01')") + } + + sql """drop catalog if exists jdbc_catalog_push_cast """ + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org