This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 1efe62c7ba7 branch-2.1: [opt](hive) add option to get schema from 
table object #50038 (#50269)
1efe62c7ba7 is described below

commit 1efe62c7ba7fa0525a3563e4ae873802b617f39b
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Apr 22 14:25:03 2025 +0800

    branch-2.1: [opt](hive) add option to get schema from table object #50038 
(#50269)
    
    Cherry-picked from #50038
    
    Co-authored-by: Mingyu Chen (Rayner) <morning...@163.com>
---
 .../doris/datasource/hive/HMSExternalCatalog.java  |   7 +++
 .../doris/datasource/hive/HMSExternalTable.java    |  23 +++++++-
 .../hive/test_hive_get_schema_from_table.out       | Bin 0 -> 6103 bytes
 .../hive/test_hive_get_schema_from_table.groovy    |  62 +++++++++++++++++++++
 4 files changed, 89 insertions(+), 3 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
index bd0d09f352e..505436903ce 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
@@ -75,6 +75,13 @@ public class HMSExternalCatalog extends ExternalCatalog {
     public static final String FILE_META_CACHE_TTL_SECOND = 
"file.meta.cache.ttl-second";
     // broker name for file split and query scan.
     public static final String BIND_BROKER_NAME = "broker.name";
+    // Default is false, if set to true, will get table schema from 
"remoteTable" instead of from hive metastore.
+    // This is because for some forward compatiblity issue of hive metastore, 
there maybe
+    // "storage schema reading not support" error being thrown.
+    // set this to true can avoid this error.
+    // But notice that if set to true, the default value of column will be 
ignored because we cannot get default value
+    // from remoteTable object.
+    public static final String GET_SCHEMA_FROM_TABLE = "get_schema_from_table";
 
     // -1 means file cache no ttl set
     public static final int FILE_META_CACHE_NO_TTL = -1;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index fda8fbf35c6..6efbcb50e8b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -75,6 +75,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.logging.log4j.LogManager;
@@ -573,9 +574,18 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
     }
 
     private Optional<SchemaCacheValue> getHiveSchema() {
-        HMSCachedClient client = ((HMSExternalCatalog) catalog).getClient();
-        List<FieldSchema> schema = client.getSchema(dbName, name);
-        Map<String, String> colDefaultValues = 
client.getDefaultColumnValues(dbName, name);
+        boolean getFromTable = catalog.getCatalogProperty()
+                .getOrDefault(HMSExternalCatalog.GET_SCHEMA_FROM_TABLE, 
"false")
+                .equalsIgnoreCase("true");
+        List<FieldSchema> schema = null;
+        Map<String, String> colDefaultValues = Maps.newHashMap();
+        if (getFromTable) {
+            schema = getSchemaFromRemoteTable(remoteTable);
+        } else {
+            HMSCachedClient client = ((HMSExternalCatalog) 
catalog).getClient();
+            schema = client.getSchema(dbName, name);
+            colDefaultValues = client.getDefaultColumnValues(dbName, name);
+        }
         List<Column> columns = Lists.newArrayListWithCapacity(schema.size());
         for (FieldSchema field : schema) {
             String fieldName = field.getName().toLowerCase(Locale.ROOT);
@@ -588,6 +598,13 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         return Optional.of(new HMSSchemaCacheValue(columns, partitionColumns));
     }
 
+    private static List<FieldSchema> getSchemaFromRemoteTable(Table table) {
+        List<FieldSchema> schema = Lists.newArrayList();
+        schema.addAll(table.getSd().getCols());
+        schema.addAll(table.getPartitionKeys());
+        return schema;
+    }
+
     @Override
     public long fetchRowCount() {
         makeSureInitialized();
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
 
b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
new file mode 100644
index 00000000000..2e190d329f1
Binary files /dev/null and 
b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
 differ
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_get_schema_from_table.groovy
 
b/regression-test/suites/external_table_p0/hive/test_hive_get_schema_from_table.groovy
new file mode 100644
index 00000000000..c07a0a763b0
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/hive/test_hive_get_schema_from_table.groovy
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_get_schema_from_table", 
"external_docker,hive,external_docker_hive,p0,external") {
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    // test get scheam from table
+    for (String hivePrefix : ["hive2", "hive3"]) {
+       String catalog_name = "test_${hivePrefix}_get_schema"
+       String ex_db_name = "`default`"
+       String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+       String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+       String hdfs_port = context.config.otherConfigs.get(hivePrefix + 
"HdfsPort")
+
+       sql """drop catalog if exists ${catalog_name} """
+
+       sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+           'type'='hms',
+           'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
+           'hadoop.username' = 'hive',
+           'get_schema_from_table' = 'true'
+       );"""
+
+       sql """switch ${catalog_name}"""
+
+       def res_dbs_log = sql "show databases;"
+       for (int i = 0; i < res_dbs_log.size(); i++) {
+           def tbs = sql "show tables from  `${res_dbs_log[i][0]}`"
+           log.info("database = ${res_dbs_log[i][0]} => tables = " + 
tbs.toString())
+       }
+
+       order_qt_schema_1 """select * from 
${catalog_name}.${ex_db_name}.parquet_partition_table order by l_orderkey limit 
1;"""
+       order_qt_schema_2 """select * from 
${catalog_name}.${ex_db_name}.parquet_delta_binary_packed order by int_value 
limit 1;"""
+       order_qt_schema_3 """select * from 
${catalog_name}.${ex_db_name}.parquet_alltypes_tiny_pages  order by id desc  
limit 5;"""
+       order_qt_schema_4 """select * from 
${catalog_name}.${ex_db_name}.orc_all_types_partition order by bigint_col desc 
limit 3;"""
+       order_qt_schema_5 """select * from 
${catalog_name}.${ex_db_name}.csv_partition_table order by k1 limit 1;"""
+       order_qt_schema_6 """select * from 
${catalog_name}.${ex_db_name}.csv_all_types limit 1;"""
+       order_qt_schema_7 """select * except(t_varchar_max_length) from 
${catalog_name}.${ex_db_name}.text_all_types limit 1;"""
+
+       //sql """drop catalog if exists ${catalog_name} """
+    
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to