This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 08c9e0518d8 [fix](mtmv) Fix rewrite by materialized view fail when query hive table (#38909) 08c9e0518d8 is described below commit 08c9e0518d8f00f2d6faa5ab6d354deaab483fa8 Author: seawinde <149132972+seawi...@users.noreply.github.com> AuthorDate: Fri Aug 9 17:09:29 2024 +0800 [fix](mtmv) Fix rewrite by materialized view fail when query hive table (#38909) mv def is select l_orderkey, l_partkey, o_custkey, l_shipdate, o_orderdate from ${hive_catalog_name}.${hive_database}.${hive_table} left join ${internal_catalog}.${olap_db}.${olap_table} on l_orderkey = o_orderkey if we query the sql as following, it will rewrite fail by mv, the fail info is `mv can not offer any partition for query` select l_orderkey, l_partkey, o_custkey, l_shipdate, o_orderdate from ${hive_catalog_name}.${hive_database}.${hive_table} left join ${internal_catalog}.${olap_db}.${olap_table} on l_orderkey = o_orderkey This pr fix this problem. it would be rewritten by mv successfully. --- .../mv/AbstractMaterializedViewRule.java | 4 + .../nereids/rules/exploration/mv/StructInfo.java | 12 +- .../mv/external_table/part_partition_invalid.out | 70 +++++ .../external_table/part_partition_invalid.groovy | 284 +++++++++++++++++++++ 4 files changed, 365 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java index b34faaf1250..857fd0e51b9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java @@ -431,6 +431,10 @@ public abstract class AbstractMaterializedViewRule implements ExplorationRuleFac Map<BaseTableInfo, Set<Partition>> queryUsedBaseTablePartitions = new LinkedHashMap<>(); queryUsedBaseTablePartitions.put(relatedPartitionTable, new HashSet<>()); queryPlan.accept(new StructInfo.QueryScanPartitionsCollector(), queryUsedBaseTablePartitions); + // Bail out, not check invalid partition if not olap scan, support later + if (queryUsedBaseTablePartitions.isEmpty()) { + return Pair.of(ImmutableMap.of(), ImmutableMap.of()); + } Set<String> queryUsedBaseTablePartitionNameSet = queryUsedBaseTablePartitions.get(relatedPartitionTable) .stream() .map(Partition::getName) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java index eeb21925653..fa29d4d0e12 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java @@ -67,7 +67,6 @@ import com.google.common.collect.Sets; import java.util.ArrayList; import java.util.BitSet; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; @@ -740,14 +739,17 @@ public class StructInfo { if (!targetTablePartitionMap.containsKey(relatedPartitionTable)) { return catalogRelation; } - // todo Support other type partition table if (catalogRelation instanceof LogicalOlapScan) { + // Handle olap table LogicalOlapScan logicalOlapScan = (LogicalOlapScan) catalogRelation; + Set<Partition> tablePartitions = targetTablePartitionMap.get(relatedPartitionTable); for (Long partitionId : logicalOlapScan.getSelectedPartitionIds()) { - Set<Partition> partitions = targetTablePartitionMap.computeIfAbsent(relatedPartitionTable, - key -> new HashSet<>()); - partitions.add(logicalOlapScan.getTable().getPartition(partitionId)); + tablePartitions.add(logicalOlapScan.getTable().getPartition(partitionId)); } + } else { + // todo Support other type partition table + // Not support to partition check now when query external catalog table, support later. + targetTablePartitionMap.clear(); } return catalogRelation; } diff --git a/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out b/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out new file mode 100644 index 00000000000..b2cd5c559ca --- /dev/null +++ b/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out @@ -0,0 +1,70 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query_sql -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 + +-- !query_mv_directly -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 + +-- !after_modify_data_without_refresh_catalog -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 + +-- !after_modify_and_without_refresh_catalog_19 -- +3 2 3 2023-10-19 2023-10-19 + +-- !after_modify_and_without_refresh_catalog_18 -- +2 2 2 2023-10-18 2023-10-18 + +-- !after_modify_data_and_refresh_catalog -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 + +-- !after_modify_and_refresh_catalog_19 -- +3 2 3 2023-10-19 2023-10-19 + +-- !after_modify_and_refresh_catalog_18 -- +2 2 2 2023-10-18 2023-10-18 + +-- !after_modify_data_and_refresh_catalog_and_mv -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + +-- !after_add_data_without_refresh_catalog -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + +-- !after_add_and_without_refresh_catalog_19 -- +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + +-- !after_add_and_without_refresh_catalog_20 -- + +-- !after_add_data_with_refresh_catalog -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + +-- !after_add_and_refresh_catalog_19 -- +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + +-- !after_add_and_refresh_catalog_20 -- + +-- !after_add_data_and_refresh_catalog_and_mv -- +\N \N 7 \N 2023-10-20 +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + diff --git a/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy b/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy new file mode 100644 index 00000000000..d2e6f18824d --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy @@ -0,0 +1,284 @@ +package mv.external_table +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("part_partition_invalid", "p0,external") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test. then doesn't test mv rewrite") + return; + } + // prepare catalog + def suite_name = "part_partition_invalid"; + def externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + def hms_port = context.config.otherConfigs.get("hive2HmsPort") + def hive_catalog_name = "${suite_name}_catalog" + def hive_database = "${suite_name}_db" + def hive_table = "${suite_name}_orders" + + sql """drop catalog if exists ${hive_catalog_name}""" + sql """ + create catalog if not exists ${hive_catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + + sql """switch ${hive_catalog_name};""" + sql """drop table if exists ${hive_catalog_name}.${hive_database}.${hive_table}""" + sql """ drop database if exists ${hive_database}""" + sql """ create database ${hive_database}""" + sql """use ${hive_database}""" + sql """ + CREATE TABLE IF NOT EXISTS ${hive_table} ( + o_orderkey integer, + o_custkey integer, + o_orderstatus char(1), + o_totalprice decimalv3(15,2), + o_orderpriority char(15), + o_clerk char(15), + o_shippriority integer, + o_comment varchar(79), + o_orderdate date + ) ENGINE=hive + PARTITION BY list(o_orderdate)() + PROPERTIES ( + "replication_num" = "1", + "file_format"="orc", + "compression"="zlib" + ); + """ + + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(1, 1, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-17');""" + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(2, 2, 'ok', 109.2, 'c','d',2, 'mm', '2023-10-18');""" + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(3, 3, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-19');""" + + // prepare table and data in olap + def internal_catalog = "internal" + def olap_db = context.config.getDbNameByFile(context.file) + def olap_table = "${suite_name}_lineitem" + + sql """switch ${internal_catalog};""" + sql "use ${olap_db};" + sql "SET enable_nereids_planner=true;" + sql "set runtime_filter_mode=OFF"; + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject';" + + sql """ + drop table if exists ${olap_table} + """ + + sql """ + CREATE TABLE IF NOT EXISTS ${olap_table} ( + l_orderkey integer not null, + l_partkey integer not null, + l_suppkey integer not null, + l_linenumber integer not null, + l_quantity decimalv3(15,2) not null, + l_extendedprice decimalv3(15,2) not null, + l_discount decimalv3(15,2) not null, + l_tax decimalv3(15,2) not null, + l_returnflag char(1) not null, + l_linestatus char(1) not null, + l_shipdate date not null, + l_commitdate date not null, + l_receiptdate date not null, + l_shipinstruct char(25) not null, + l_shipmode char(10) not null, + l_comment varchar(44) not null + ) + DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber) + PARTITION BY RANGE(l_shipdate) + (FROM ('2023-10-01') TO ('2023-10-30') INTERVAL 1 DAY) + DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + insert into ${olap_table} values + (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy'), + (2, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy'), + (3, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx'); + """ + + def query_sql = """ + select l_orderkey, l_partkey, o_custkey, l_shipdate, o_orderdate + from ${hive_catalog_name}.${hive_database}.${hive_table} + left join ${internal_catalog}.${olap_db}.${olap_table} on l_orderkey = o_orderkey + """ + order_qt_query_sql """${query_sql}""" + + // create partition mtmv, related partition is hive catalog + def mv_name = 'mv_join' + sql """drop materialized view if exists ${mv_name}""" + sql """ + CREATE MATERIALIZED VIEW ${mv_name} + BUILD IMMEDIATE REFRESH AUTO ON MANUAL + partition by(`o_orderdate`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1', 'grace_period' = '0') + AS ${query_sql} + """ + + sql """REFRESH MATERIALIZED VIEW ${mv_name} complete""" + waitingMTMVTaskFinished(getJobName(olap_db, mv_name)) + order_qt_query_mv_directly """select * from ${mv_name};""" + + // test query rewrite by mv, should fail ,because materialized_view_rewrite_enable_contain_external_table + // is false default + explain { + sql(""" ${query_sql}""") + notContains("${mv_name}(${mv_name})") + } + sql "SET materialized_view_rewrite_enable_contain_external_table=true" + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + + // data change in external table doesn't influence query rewrite, + // if want to use new data in external table should be refresh manually + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(3, 3, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-19');""" + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_data_without_refresh_catalog """ ${query_sql}""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-19'; + """) + // query invalid partition data, should hit mv, because not check now. + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_and_without_refresh_catalog_19 """ ${query_sql} where o_orderdate = '2023-10-19';""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-18'; + """) + // query valid partition data, should hit mv + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_and_without_refresh_catalog_18 """ ${query_sql} where o_orderdate = '2023-10-18';""" + + // refresh catalog cache + sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" = "true"); """ + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_data_and_refresh_catalog """ ${query_sql}""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-19'; + """) + // query invalid partition data, should hit mv, because not check now. + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_and_refresh_catalog_19 """ ${query_sql} where o_orderdate = '2023-10-19';""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-18'; + """) + // query valid partition data, should hit mv + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_and_refresh_catalog_18 """ ${query_sql} where o_orderdate = '2023-10-18';""" + + // refresh manually + sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" = "true"); """ + sql """REFRESH MATERIALIZED VIEW ${mv_name} auto""" + waitingMTMVTaskFinished(getJobName(olap_db, mv_name)) + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_data_and_refresh_catalog_and_mv """ ${query_sql}""" + + // test after hive add partition + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(6, 7, 'ok', 29.5, 'x', 'y', 6, 'ss', '2023-10-20');""" + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_add_data_without_refresh_catalog """ ${query_sql}""" + + explain { + sql(""" + ${query_sql} + """) + // query invalid partition data, should hit mv, because not check now. + contains("${mv_name}(${mv_name})") + } + order_qt_after_add_and_without_refresh_catalog_19 """ ${query_sql} where o_orderdate = '2023-10-19';""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-20'; + """) + // query valid partition data, should hit mv + notContains("${mv_name}(${mv_name})") + } + order_qt_after_add_and_without_refresh_catalog_20 """ ${query_sql} where o_orderdate = '2023-10-20';""" + + // refresh catalog cache + sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" = "true"); """ + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_add_data_with_refresh_catalog """ ${query_sql}""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-19'; + """) + // query invalid partition data, should hit mv, because not check now. + contains("${mv_name}(${mv_name})") + } + order_qt_after_add_and_refresh_catalog_19 """ ${query_sql} where o_orderdate = '2023-10-19';""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-20'; + """) + // query valid partition data, should hit mv + notContains("${mv_name}(${mv_name})") + } + order_qt_after_add_and_refresh_catalog_20 """ ${query_sql} where o_orderdate = '2023-10-20';""" + + // refresh manually + sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" = "true"); """ + sql """REFRESH MATERIALIZED VIEW ${mv_name} auto""" + waitingMTMVTaskFinished(getJobName(olap_db, mv_name)) + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_add_data_and_refresh_catalog_and_mv """ ${query_sql}""" + + sql """drop table if exists ${hive_catalog_name}.${hive_database}.${hive_table}""" + sql """drop table if exists ${internal_catalog}.${olap_db}.${olap_table}""" + sql """drop database if exists ${hive_catalog_name}.${hive_database}""" + sql """drop materialized view if exists ${internal_catalog}.${olap_db}.${mv_name};""" + sql """drop catalog if exists ${hive_catalog_name}""" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org