This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c223a65b1bb [fix](hive) support query hive view created by spark 
(#43530)
c223a65b1bb is described below

commit c223a65b1bb544eca2c0510d442103bcfe622e9c
Author: Mingyu Chen (Rayner) <morning...@163.com>
AuthorDate: Mon Nov 11 09:51:05 2024 +0800

    [fix](hive) support query hive view created by spark (#43530)
    
    ### What problem does this PR solve?
    
    Problem Summary:
    
    1.
    When using Spark to create hive view, the `inputFormat` saved in hive
    view is `SequenceFileInputFormat`.
    I think this is a bug of Spark.
    Actually, we don't need to check input format for view, so we skip the
    format check for hive view.
    
    2.
    Fix a bug that when analyzing hive view, we should set both catalog and
    db in temporary connection context,
    otherwise, the parse of hive view sql may fail, and will also reset the
    default db of current connection context,
    causing following statement executed in this session failed because no
    default db is found.
    
    Co-authored-by: morningman <yun...@selectdb.com>
---
 .../scripts/create_view_scripts/create_view.hql    |   6 +
 .../docker-compose/hive/scripts/hive-metastore.sh  |   7 ++
 .../doris/datasource/hive/HMSExternalTable.java    |   4 +
 .../doris/nereids/rules/analysis/BindRelation.java |  19 ++-
 .../data/external_table_p0/hive/test_hive_view.out | 133 +++++++++++++++++++++
 .../external_table_p0/hive/test_hive_view.groovy   |  66 ++++++++++
 6 files changed, 229 insertions(+), 6 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql
new file mode 100644
index 00000000000..221b536e263
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql
@@ -0,0 +1,6 @@
+use default;
+create view test_view1 as select * from sale_table;
+create view test_view2 as select * from default.sale_table;
+create view test_view3 as select * from sale_table where 
bill_code="bill_code1";
+create view test_view4 as select parquet_zstd_all_types.t_int, 
parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join 
multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar = 
parquet_all_types.t_varchar order by t_int limit 10;
+create view unsupported_view as select bill_code from sale_table union all 
select t_varchar from multi_catalog.parquet_all_types order by bill_code limit 
10;
diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh 
b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
index 46d1437db29..422e3451f5c 100755
--- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
+++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
@@ -114,6 +114,13 @@ ls /mnt/scripts/create_preinstalled_scripts/*.hql | xargs 
-n 1 -P 10 -I {} bash
     echo "Script: {} executed in $EXECUTION_TIME seconds"
 '
 
+# create view
+START_TIME=$(date +%s)
+hive -f /mnt/scripts/create_view_scripts/create_view.hql
+END_TIME=$(date +%s)
+EXECUTION_TIME=$((END_TIME - START_TIME))
+echo "Script: create_view.hql executed in $EXECUTION_TIME seconds"
+
 touch /mnt/SUCCESS
 
 # Avoid container exit
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 445aae09f0d..aacd9268ae3 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -253,6 +253,10 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         if (remoteTable.getSd() == null) {
             throw new NotSupportedException("remote table's storage descriptor 
is null");
         }
+        // If this is hive view, no need to check file format.
+        if (remoteTable.isSetViewExpandedText() || 
remoteTable.isSetViewOriginalText()) {
+            return true;
+        }
         String inputFileFormat = remoteTable.getSd().getInputFormat();
         if (inputFileFormat == null) {
             throw new NotSupportedException("remote table's storage input 
format is null");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
index c776e73c240..9699ca243cf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
@@ -420,8 +420,10 @@ public class BindRelation extends OneAnalysisRuleFactory {
                     if (Config.enable_query_hive_views && hmsTable.isView()) {
                         isView = true;
                         String hiveCatalog = hmsTable.getCatalog().getName();
+                        String hiveDb = hmsTable.getDatabase().getFullName();
                         String ddlSql = hmsTable.getViewText();
-                        Plan hiveViewPlan = parseAndAnalyzeHiveView(hmsTable, 
hiveCatalog, ddlSql, cascadesContext);
+                        Plan hiveViewPlan = parseAndAnalyzeHiveView(
+                                hmsTable, hiveCatalog, hiveDb, ddlSql, 
cascadesContext);
                         return new LogicalSubQueryAlias<>(qualifiedTableName, 
hiveViewPlan);
                     }
                     if (hmsTable.getDlaType() == DLAType.HUDI) {
@@ -475,15 +477,20 @@ public class BindRelation extends OneAnalysisRuleFactory {
     }
 
     private Plan parseAndAnalyzeHiveView(
-            HMSExternalTable table, String hiveCatalog, String ddlSql, 
CascadesContext cascadesContext) {
+            HMSExternalTable table, String hiveCatalog, String hiveDb, String 
ddlSql, CascadesContext cascadesContext) {
         ConnectContext ctx = cascadesContext.getConnectContext();
         String previousCatalog = ctx.getCurrentCatalog().getName();
         String previousDb = ctx.getDatabase();
+        // change catalog and db to hive catalog and db, so that we can parse 
and analyze the view sql in hive context.
         ctx.changeDefaultCatalog(hiveCatalog);
-        Plan hiveViewPlan = parseAndAnalyzeView(table, ddlSql, 
cascadesContext);
-        ctx.changeDefaultCatalog(previousCatalog);
-        ctx.setDatabase(previousDb);
-        return hiveViewPlan;
+        ctx.setDatabase(hiveDb);
+        try {
+            return parseAndAnalyzeView(table, ddlSql, cascadesContext);
+        } finally {
+            // restore catalog and db in connect context
+            ctx.changeDefaultCatalog(previousCatalog);
+            ctx.setDatabase(previousDb);
+        }
     }
 
     private Plan parseAndAnalyzeView(TableIf view, String ddlSql, 
CascadesContext parentContext) {
diff --git a/regression-test/data/external_table_p0/hive/test_hive_view.out 
b/regression-test/data/external_table_p0/hive/test_hive_view.out
new file mode 100644
index 00000000000..de207860d6d
--- /dev/null
+++ b/regression-test/data/external_table_p0/hive/test_hive_view.out
@@ -0,0 +1,133 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !desc1 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc2 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc3 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc4 --
+t_int  int     Yes     true    \N      
+t_varchar      varchar(65535)  Yes     true    \N      
+
+-- !sql1 --
+bill_code      dates   ord_year        ord_month       ord_quarter     on_time
+bill_code1     dates2  ord_year3       ord_month4      ord_quarter5    on_time6
+
+-- !sql2 --
+dates
+dates2
+
+-- !sql3 --
+1
+
+-- !sql4 --
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+
+-- !sql5 --
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+
+-- !desc5 --
+bill_code      varchar(65535)  Yes     true    \N      
+
+-- !desc1 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc2 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc3 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc4 --
+t_int  int     Yes     true    \N      
+t_varchar      varchar(65535)  Yes     true    \N      
+
+-- !sql1 --
+bill_code      dates   ord_year        ord_month       ord_quarter     on_time
+bill_code1     dates2  ord_year3       ord_month4      ord_quarter5    on_time6
+
+-- !sql2 --
+dates
+dates2
+
+-- !sql3 --
+1
+
+-- !sql4 --
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+
+-- !sql5 --
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+
+-- !desc5 --
+bill_code      varchar(65535)  Yes     true    \N      
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_view.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_view.groovy
new file mode 100644
index 00000000000..f344888019f
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/test_hive_view.groovy
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_view", 
"external_docker,hive,external_docker_hive,p0,external") {
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    for (String hivePrefix : ["hive2", "hive3"]) {
+        setHivePrefix(hivePrefix)
+        String catalog_name = "test_${hivePrefix}_view"
+        String ex_db_name = "`default`"
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+        String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String hdfs_port = context.config.otherConfigs.get(hivePrefix + 
"HdfsPort")
+
+        sql """drop catalog if exists ${catalog_name} """
+
+        sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+            'type'='hms',
+            'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
+            'hadoop.username' = 'hive'
+        );"""
+
+        sql """use ${catalog_name}.${ex_db_name}"""
+        qt_desc1 """desc test_view1"""
+        qt_desc2 """desc test_view2"""
+        qt_desc3 """desc test_view3"""
+        qt_desc4 """desc test_view4"""
+        // create view test_view1 as select * from sale_table;
+        order_qt_sql1 """ select * from test_view1 """
+        // create view test_view2 as select * from default.sale_table;
+        order_qt_sql2 """ select dates from test_view2 """
+        // create view test_view3 as select * from sale_table where 
bill_code="bill_code1";
+        order_qt_sql3 """ select count(*) from test_view3 """
+        // create view test_view4 as select parquet_zstd_all_types.t_int, 
parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join 
multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar = 
parquet_all_types.t_varchar order by t_int limit 10;
+        order_qt_sql4 """ select * from test_view4 """
+        order_qt_sql5 """ select test_view4.t_int from test_view4 join 
multi_catalog.parquet_all_types on test_view4.t_varchar = 
parquet_all_types.t_varchar order by test_view4.t_int limit 10; """
+
+        // check unsupported view
+        sql """set enable_fallback_to_original_planner=false;"""
+        test {
+            sql """select * from unsupported_view;"""
+            exception """Unknown column 'bill_code' in '_u2' in SORT clause"""
+        }
+        // pr #43530
+        qt_desc5 """desc unsupported_view;"""
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to