This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 1efe62c7ba7 branch-2.1: [opt](hive) add option to get schema from table object #50038 (#50269) 1efe62c7ba7 is described below commit 1efe62c7ba7fa0525a3563e4ae873802b617f39b Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Tue Apr 22 14:25:03 2025 +0800 branch-2.1: [opt](hive) add option to get schema from table object #50038 (#50269) Cherry-picked from #50038 Co-authored-by: Mingyu Chen (Rayner) <morning...@163.com> --- .../doris/datasource/hive/HMSExternalCatalog.java | 7 +++ .../doris/datasource/hive/HMSExternalTable.java | 23 +++++++- .../hive/test_hive_get_schema_from_table.out | Bin 0 -> 6103 bytes .../hive/test_hive_get_schema_from_table.groovy | 62 +++++++++++++++++++++ 4 files changed, 89 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java index bd0d09f352e..505436903ce 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java @@ -75,6 +75,13 @@ public class HMSExternalCatalog extends ExternalCatalog { public static final String FILE_META_CACHE_TTL_SECOND = "file.meta.cache.ttl-second"; // broker name for file split and query scan. public static final String BIND_BROKER_NAME = "broker.name"; + // Default is false, if set to true, will get table schema from "remoteTable" instead of from hive metastore. + // This is because for some forward compatiblity issue of hive metastore, there maybe + // "storage schema reading not support" error being thrown. + // set this to true can avoid this error. + // But notice that if set to true, the default value of column will be ignored because we cannot get default value + // from remoteTable object. + public static final String GET_SCHEMA_FROM_TABLE = "get_schema_from_table"; // -1 means file cache no ttl set public static final int FILE_META_CACHE_NO_TTL = -1; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index fda8fbf35c6..6efbcb50e8b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -75,6 +75,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.logging.log4j.LogManager; @@ -573,9 +574,18 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI } private Optional<SchemaCacheValue> getHiveSchema() { - HMSCachedClient client = ((HMSExternalCatalog) catalog).getClient(); - List<FieldSchema> schema = client.getSchema(dbName, name); - Map<String, String> colDefaultValues = client.getDefaultColumnValues(dbName, name); + boolean getFromTable = catalog.getCatalogProperty() + .getOrDefault(HMSExternalCatalog.GET_SCHEMA_FROM_TABLE, "false") + .equalsIgnoreCase("true"); + List<FieldSchema> schema = null; + Map<String, String> colDefaultValues = Maps.newHashMap(); + if (getFromTable) { + schema = getSchemaFromRemoteTable(remoteTable); + } else { + HMSCachedClient client = ((HMSExternalCatalog) catalog).getClient(); + schema = client.getSchema(dbName, name); + colDefaultValues = client.getDefaultColumnValues(dbName, name); + } List<Column> columns = Lists.newArrayListWithCapacity(schema.size()); for (FieldSchema field : schema) { String fieldName = field.getName().toLowerCase(Locale.ROOT); @@ -588,6 +598,13 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI return Optional.of(new HMSSchemaCacheValue(columns, partitionColumns)); } + private static List<FieldSchema> getSchemaFromRemoteTable(Table table) { + List<FieldSchema> schema = Lists.newArrayList(); + schema.addAll(table.getSd().getCols()); + schema.addAll(table.getPartitionKeys()); + return schema; + } + @Override public long fetchRowCount() { makeSureInitialized(); diff --git a/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out new file mode 100644 index 00000000000..2e190d329f1 Binary files /dev/null and b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out differ diff --git a/regression-test/suites/external_table_p0/hive/test_hive_get_schema_from_table.groovy b/regression-test/suites/external_table_p0/hive/test_hive_get_schema_from_table.groovy new file mode 100644 index 00000000000..c07a0a763b0 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_get_schema_from_table.groovy @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_get_schema_from_table", "external_docker,hive,external_docker_hive,p0,external") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + // test get scheam from table + for (String hivePrefix : ["hive2", "hive3"]) { + String catalog_name = "test_${hivePrefix}_get_schema" + String ex_db_name = "`default`" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + + sql """drop catalog if exists ${catalog_name} """ + + sql """CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'hadoop.username' = 'hive', + 'get_schema_from_table' = 'true' + );""" + + sql """switch ${catalog_name}""" + + def res_dbs_log = sql "show databases;" + for (int i = 0; i < res_dbs_log.size(); i++) { + def tbs = sql "show tables from `${res_dbs_log[i][0]}`" + log.info("database = ${res_dbs_log[i][0]} => tables = " + tbs.toString()) + } + + order_qt_schema_1 """select * from ${catalog_name}.${ex_db_name}.parquet_partition_table order by l_orderkey limit 1;""" + order_qt_schema_2 """select * from ${catalog_name}.${ex_db_name}.parquet_delta_binary_packed order by int_value limit 1;""" + order_qt_schema_3 """select * from ${catalog_name}.${ex_db_name}.parquet_alltypes_tiny_pages order by id desc limit 5;""" + order_qt_schema_4 """select * from ${catalog_name}.${ex_db_name}.orc_all_types_partition order by bigint_col desc limit 3;""" + order_qt_schema_5 """select * from ${catalog_name}.${ex_db_name}.csv_partition_table order by k1 limit 1;""" + order_qt_schema_6 """select * from ${catalog_name}.${ex_db_name}.csv_all_types limit 1;""" + order_qt_schema_7 """select * except(t_varchar_max_length) from ${catalog_name}.${ex_db_name}.text_all_types limit 1;""" + + //sql """drop catalog if exists ${catalog_name} """ + + } +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org