This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new fbf81f4672e branch-3.0: [fix](hive) support query hive view created by
spark (#43552)
fbf81f4672e is described below
commit fbf81f4672e5275da50f14717d19c3ca40f7a3d0
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 12 20:08:47 2024 +0800
branch-3.0: [fix](hive) support query hive view created by spark (#43552)
Cherry-picked from #43530
Co-authored-by: Mingyu Chen (Rayner) <[email protected]>
Co-authored-by: morningman <[email protected]>
---
.../scripts/create_view_scripts/create_view.hql | 6 +
.../docker-compose/hive/scripts/hive-metastore.sh | 7 ++
.../doris/datasource/hive/HMSExternalTable.java | 4 +
.../doris/nereids/rules/analysis/BindRelation.java | 19 ++-
.../data/external_table_p0/hive/test_hive_view.out | 133 +++++++++++++++++++++
.../external_table_p0/hive/test_hive_view.groovy | 66 ++++++++++
6 files changed, 229 insertions(+), 6 deletions(-)
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql
b/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql
new file mode 100644
index 00000000000..221b536e263
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql
@@ -0,0 +1,6 @@
+use default;
+create view test_view1 as select * from sale_table;
+create view test_view2 as select * from default.sale_table;
+create view test_view3 as select * from sale_table where
bill_code="bill_code1";
+create view test_view4 as select parquet_zstd_all_types.t_int,
parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join
multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar =
parquet_all_types.t_varchar order by t_int limit 10;
+create view unsupported_view as select bill_code from sale_table union all
select t_varchar from multi_catalog.parquet_all_types order by bill_code limit
10;
diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
index 8c0b3c3c2fd..7b92e14975b 100755
--- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
+++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
@@ -114,6 +114,13 @@ ls /mnt/scripts/create_preinstalled_scripts/*.hql | xargs
-n 1 -P 10 -I {} bash
echo "Script: {} executed in $EXECUTION_TIME seconds"
'
+# create view
+START_TIME=$(date +%s)
+hive -f /mnt/scripts/create_view_scripts/create_view.hql
+END_TIME=$(date +%s)
+EXECUTION_TIME=$((END_TIME - START_TIME))
+echo "Script: create_view.hql executed in $EXECUTION_TIME seconds"
+
touch /mnt/SUCCESS
# Avoid container exit
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 445aae09f0d..aacd9268ae3 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -253,6 +253,10 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
if (remoteTable.getSd() == null) {
throw new NotSupportedException("remote table's storage descriptor
is null");
}
+ // If this is hive view, no need to check file format.
+ if (remoteTable.isSetViewExpandedText() ||
remoteTable.isSetViewOriginalText()) {
+ return true;
+ }
String inputFileFormat = remoteTable.getSd().getInputFormat();
if (inputFileFormat == null) {
throw new NotSupportedException("remote table's storage input
format is null");
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
index e8680d9f549..c8d0a6f5dd4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
@@ -420,8 +420,10 @@ public class BindRelation extends OneAnalysisRuleFactory {
if (Config.enable_query_hive_views && hmsTable.isView()) {
isView = true;
String hiveCatalog = hmsTable.getCatalog().getName();
+ String hiveDb = hmsTable.getDatabase().getFullName();
String ddlSql = hmsTable.getViewText();
- Plan hiveViewPlan = parseAndAnalyzeHiveView(hmsTable,
hiveCatalog, ddlSql, cascadesContext);
+ Plan hiveViewPlan = parseAndAnalyzeHiveView(
+ hmsTable, hiveCatalog, hiveDb, ddlSql,
cascadesContext);
return new LogicalSubQueryAlias<>(qualifiedTableName,
hiveViewPlan);
}
if (hmsTable.getDlaType() == DLAType.HUDI) {
@@ -474,15 +476,20 @@ public class BindRelation extends OneAnalysisRuleFactory {
}
private Plan parseAndAnalyzeHiveView(
- HMSExternalTable table, String hiveCatalog, String ddlSql,
CascadesContext cascadesContext) {
+ HMSExternalTable table, String hiveCatalog, String hiveDb, String
ddlSql, CascadesContext cascadesContext) {
ConnectContext ctx = cascadesContext.getConnectContext();
String previousCatalog = ctx.getCurrentCatalog().getName();
String previousDb = ctx.getDatabase();
+ // change catalog and db to hive catalog and db, so that we can parse
and analyze the view sql in hive context.
ctx.changeDefaultCatalog(hiveCatalog);
- Plan hiveViewPlan = parseAndAnalyzeView(table, ddlSql,
cascadesContext);
- ctx.changeDefaultCatalog(previousCatalog);
- ctx.setDatabase(previousDb);
- return hiveViewPlan;
+ ctx.setDatabase(hiveDb);
+ try {
+ return parseAndAnalyzeView(table, ddlSql, cascadesContext);
+ } finally {
+ // restore catalog and db in connect context
+ ctx.changeDefaultCatalog(previousCatalog);
+ ctx.setDatabase(previousDb);
+ }
}
private Plan parseAndAnalyzeView(TableIf view, String ddlSql,
CascadesContext parentContext) {
diff --git a/regression-test/data/external_table_p0/hive/test_hive_view.out
b/regression-test/data/external_table_p0/hive/test_hive_view.out
new file mode 100644
index 00000000000..de207860d6d
--- /dev/null
+++ b/regression-test/data/external_table_p0/hive/test_hive_view.out
@@ -0,0 +1,133 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !desc1 --
+bill_code varchar(500) Yes true \N
+dates varchar(500) Yes true \N
+ord_year varchar(500) Yes true \N
+ord_month varchar(500) Yes true \N
+ord_quarter varchar(500) Yes true \N
+on_time varchar(500) Yes true \N
+
+-- !desc2 --
+bill_code varchar(500) Yes true \N
+dates varchar(500) Yes true \N
+ord_year varchar(500) Yes true \N
+ord_month varchar(500) Yes true \N
+ord_quarter varchar(500) Yes true \N
+on_time varchar(500) Yes true \N
+
+-- !desc3 --
+bill_code varchar(500) Yes true \N
+dates varchar(500) Yes true \N
+ord_year varchar(500) Yes true \N
+ord_month varchar(500) Yes true \N
+ord_quarter varchar(500) Yes true \N
+on_time varchar(500) Yes true \N
+
+-- !desc4 --
+t_int int Yes true \N
+t_varchar varchar(65535) Yes true \N
+
+-- !sql1 --
+bill_code dates ord_year ord_month ord_quarter on_time
+bill_code1 dates2 ord_year3 ord_month4 ord_quarter5 on_time6
+
+-- !sql2 --
+dates
+dates2
+
+-- !sql3 --
+1
+
+-- !sql4 --
+3 test
+3 test
+3 test
+3 test
+3 test
+3 test
+3 test
+3 test
+3 test
+3 test
+
+-- !sql5 --
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+
+-- !desc5 --
+bill_code varchar(65535) Yes true \N
+
+-- !desc1 --
+bill_code varchar(500) Yes true \N
+dates varchar(500) Yes true \N
+ord_year varchar(500) Yes true \N
+ord_month varchar(500) Yes true \N
+ord_quarter varchar(500) Yes true \N
+on_time varchar(500) Yes true \N
+
+-- !desc2 --
+bill_code varchar(500) Yes true \N
+dates varchar(500) Yes true \N
+ord_year varchar(500) Yes true \N
+ord_month varchar(500) Yes true \N
+ord_quarter varchar(500) Yes true \N
+on_time varchar(500) Yes true \N
+
+-- !desc3 --
+bill_code varchar(500) Yes true \N
+dates varchar(500) Yes true \N
+ord_year varchar(500) Yes true \N
+ord_month varchar(500) Yes true \N
+ord_quarter varchar(500) Yes true \N
+on_time varchar(500) Yes true \N
+
+-- !desc4 --
+t_int int Yes true \N
+t_varchar varchar(65535) Yes true \N
+
+-- !sql1 --
+bill_code dates ord_year ord_month ord_quarter on_time
+bill_code1 dates2 ord_year3 ord_month4 ord_quarter5 on_time6
+
+-- !sql2 --
+dates
+dates2
+
+-- !sql3 --
+1
+
+-- !sql4 --
+3 test
+3 test
+3 test
+3 test
+3 test
+3 test
+3 test
+3 test
+3 test
+3 test
+
+-- !sql5 --
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+
+-- !desc5 --
+bill_code varchar(65535) Yes true \N
+
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_view.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_view.groovy
new file mode 100644
index 00000000000..f344888019f
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/test_hive_view.groovy
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_view",
"external_docker,hive,external_docker_hive,p0,external") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("diable Hive test.")
+ return;
+ }
+
+ for (String hivePrefix : ["hive2", "hive3"]) {
+ setHivePrefix(hivePrefix)
+ String catalog_name = "test_${hivePrefix}_view"
+ String ex_db_name = "`default`"
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String hdfs_port = context.config.otherConfigs.get(hivePrefix +
"HdfsPort")
+
+ sql """drop catalog if exists ${catalog_name} """
+
+ sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+ 'type'='hms',
+ 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
+ 'hadoop.username' = 'hive'
+ );"""
+
+ sql """use ${catalog_name}.${ex_db_name}"""
+ qt_desc1 """desc test_view1"""
+ qt_desc2 """desc test_view2"""
+ qt_desc3 """desc test_view3"""
+ qt_desc4 """desc test_view4"""
+ // create view test_view1 as select * from sale_table;
+ order_qt_sql1 """ select * from test_view1 """
+ // create view test_view2 as select * from default.sale_table;
+ order_qt_sql2 """ select dates from test_view2 """
+ // create view test_view3 as select * from sale_table where
bill_code="bill_code1";
+ order_qt_sql3 """ select count(*) from test_view3 """
+ // create view test_view4 as select parquet_zstd_all_types.t_int,
parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join
multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar =
parquet_all_types.t_varchar order by t_int limit 10;
+ order_qt_sql4 """ select * from test_view4 """
+ order_qt_sql5 """ select test_view4.t_int from test_view4 join
multi_catalog.parquet_all_types on test_view4.t_varchar =
parquet_all_types.t_varchar order by test_view4.t_int limit 10; """
+
+ // check unsupported view
+ sql """set enable_fallback_to_original_planner=false;"""
+ test {
+ sql """select * from unsupported_view;"""
+ exception """Unknown column 'bill_code' in '_u2' in SORT clause"""
+ }
+ // pr #43530
+ qt_desc5 """desc unsupported_view;"""
+ }
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]