This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new c223a65b1bb [fix](hive) support query hive view created by spark (#43530) c223a65b1bb is described below commit c223a65b1bb544eca2c0510d442103bcfe622e9c Author: Mingyu Chen (Rayner) <morning...@163.com> AuthorDate: Mon Nov 11 09:51:05 2024 +0800 [fix](hive) support query hive view created by spark (#43530) ### What problem does this PR solve? Problem Summary: 1. When using Spark to create hive view, the `inputFormat` saved in hive view is `SequenceFileInputFormat`. I think this is a bug of Spark. Actually, we don't need to check input format for view, so we skip the format check for hive view. 2. Fix a bug that when analyzing hive view, we should set both catalog and db in temporary connection context, otherwise, the parse of hive view sql may fail, and will also reset the default db of current connection context, causing following statement executed in this session failed because no default db is found. Co-authored-by: morningman <yun...@selectdb.com> --- .../scripts/create_view_scripts/create_view.hql | 6 + .../docker-compose/hive/scripts/hive-metastore.sh | 7 ++ .../doris/datasource/hive/HMSExternalTable.java | 4 + .../doris/nereids/rules/analysis/BindRelation.java | 19 ++- .../data/external_table_p0/hive/test_hive_view.out | 133 +++++++++++++++++++++ .../external_table_p0/hive/test_hive_view.groovy | 66 ++++++++++ 6 files changed, 229 insertions(+), 6 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql b/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql new file mode 100644 index 00000000000..221b536e263 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql @@ -0,0 +1,6 @@ +use default; +create view test_view1 as select * from sale_table; +create view test_view2 as select * from default.sale_table; +create view test_view3 as select * from sale_table where bill_code="bill_code1"; +create view test_view4 as select parquet_zstd_all_types.t_int, parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar = parquet_all_types.t_varchar order by t_int limit 10; +create view unsupported_view as select bill_code from sale_table union all select t_varchar from multi_catalog.parquet_all_types order by bill_code limit 10; diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh index 46d1437db29..422e3451f5c 100755 --- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh +++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh @@ -114,6 +114,13 @@ ls /mnt/scripts/create_preinstalled_scripts/*.hql | xargs -n 1 -P 10 -I {} bash echo "Script: {} executed in $EXECUTION_TIME seconds" ' +# create view +START_TIME=$(date +%s) +hive -f /mnt/scripts/create_view_scripts/create_view.hql +END_TIME=$(date +%s) +EXECUTION_TIME=$((END_TIME - START_TIME)) +echo "Script: create_view.hql executed in $EXECUTION_TIME seconds" + touch /mnt/SUCCESS # Avoid container exit diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 445aae09f0d..aacd9268ae3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -253,6 +253,10 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI if (remoteTable.getSd() == null) { throw new NotSupportedException("remote table's storage descriptor is null"); } + // If this is hive view, no need to check file format. + if (remoteTable.isSetViewExpandedText() || remoteTable.isSetViewOriginalText()) { + return true; + } String inputFileFormat = remoteTable.getSd().getInputFormat(); if (inputFileFormat == null) { throw new NotSupportedException("remote table's storage input format is null"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index c776e73c240..9699ca243cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -420,8 +420,10 @@ public class BindRelation extends OneAnalysisRuleFactory { if (Config.enable_query_hive_views && hmsTable.isView()) { isView = true; String hiveCatalog = hmsTable.getCatalog().getName(); + String hiveDb = hmsTable.getDatabase().getFullName(); String ddlSql = hmsTable.getViewText(); - Plan hiveViewPlan = parseAndAnalyzeHiveView(hmsTable, hiveCatalog, ddlSql, cascadesContext); + Plan hiveViewPlan = parseAndAnalyzeHiveView( + hmsTable, hiveCatalog, hiveDb, ddlSql, cascadesContext); return new LogicalSubQueryAlias<>(qualifiedTableName, hiveViewPlan); } if (hmsTable.getDlaType() == DLAType.HUDI) { @@ -475,15 +477,20 @@ public class BindRelation extends OneAnalysisRuleFactory { } private Plan parseAndAnalyzeHiveView( - HMSExternalTable table, String hiveCatalog, String ddlSql, CascadesContext cascadesContext) { + HMSExternalTable table, String hiveCatalog, String hiveDb, String ddlSql, CascadesContext cascadesContext) { ConnectContext ctx = cascadesContext.getConnectContext(); String previousCatalog = ctx.getCurrentCatalog().getName(); String previousDb = ctx.getDatabase(); + // change catalog and db to hive catalog and db, so that we can parse and analyze the view sql in hive context. ctx.changeDefaultCatalog(hiveCatalog); - Plan hiveViewPlan = parseAndAnalyzeView(table, ddlSql, cascadesContext); - ctx.changeDefaultCatalog(previousCatalog); - ctx.setDatabase(previousDb); - return hiveViewPlan; + ctx.setDatabase(hiveDb); + try { + return parseAndAnalyzeView(table, ddlSql, cascadesContext); + } finally { + // restore catalog and db in connect context + ctx.changeDefaultCatalog(previousCatalog); + ctx.setDatabase(previousDb); + } } private Plan parseAndAnalyzeView(TableIf view, String ddlSql, CascadesContext parentContext) { diff --git a/regression-test/data/external_table_p0/hive/test_hive_view.out b/regression-test/data/external_table_p0/hive/test_hive_view.out new file mode 100644 index 00000000000..de207860d6d --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_hive_view.out @@ -0,0 +1,133 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !desc1 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc2 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc3 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc4 -- +t_int int Yes true \N +t_varchar varchar(65535) Yes true \N + +-- !sql1 -- +bill_code dates ord_year ord_month ord_quarter on_time +bill_code1 dates2 ord_year3 ord_month4 ord_quarter5 on_time6 + +-- !sql2 -- +dates +dates2 + +-- !sql3 -- +1 + +-- !sql4 -- +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test + +-- !sql5 -- +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 + +-- !desc5 -- +bill_code varchar(65535) Yes true \N + +-- !desc1 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc2 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc3 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc4 -- +t_int int Yes true \N +t_varchar varchar(65535) Yes true \N + +-- !sql1 -- +bill_code dates ord_year ord_month ord_quarter on_time +bill_code1 dates2 ord_year3 ord_month4 ord_quarter5 on_time6 + +-- !sql2 -- +dates +dates2 + +-- !sql3 -- +1 + +-- !sql4 -- +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test + +-- !sql5 -- +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 + +-- !desc5 -- +bill_code varchar(65535) Yes true \N + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_view.groovy b/regression-test/suites/external_table_p0/hive/test_hive_view.groovy new file mode 100644 index 00000000000..f344888019f --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_view.groovy @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_view", "external_docker,hive,external_docker_hive,p0,external") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + for (String hivePrefix : ["hive2", "hive3"]) { + setHivePrefix(hivePrefix) + String catalog_name = "test_${hivePrefix}_view" + String ex_db_name = "`default`" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + + sql """drop catalog if exists ${catalog_name} """ + + sql """CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'hadoop.username' = 'hive' + );""" + + sql """use ${catalog_name}.${ex_db_name}""" + qt_desc1 """desc test_view1""" + qt_desc2 """desc test_view2""" + qt_desc3 """desc test_view3""" + qt_desc4 """desc test_view4""" + // create view test_view1 as select * from sale_table; + order_qt_sql1 """ select * from test_view1 """ + // create view test_view2 as select * from default.sale_table; + order_qt_sql2 """ select dates from test_view2 """ + // create view test_view3 as select * from sale_table where bill_code="bill_code1"; + order_qt_sql3 """ select count(*) from test_view3 """ + // create view test_view4 as select parquet_zstd_all_types.t_int, parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar = parquet_all_types.t_varchar order by t_int limit 10; + order_qt_sql4 """ select * from test_view4 """ + order_qt_sql5 """ select test_view4.t_int from test_view4 join multi_catalog.parquet_all_types on test_view4.t_varchar = parquet_all_types.t_varchar order by test_view4.t_int limit 10; """ + + // check unsupported view + sql """set enable_fallback_to_original_planner=false;""" + test { + sql """select * from unsupported_view;""" + exception """Unknown column 'bill_code' in '_u2' in SORT clause""" + } + // pr #43530 + qt_desc5 """desc unsupported_view;""" + } +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org