This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 124516c1ea [Fix](orc-reader) Fix `Wrong data type for column` error when column order in hive table is not same in orc file schema. (#21306) 124516c1ea is described below commit 124516c1ea429c6743093e41d607fb3de0a29e3c Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Mon Jul 3 09:32:55 2023 +0800 [Fix](orc-reader) Fix `Wrong data type for column` error when column order in hive table is not same in orc file schema. (#21306) `Wrong data type for column` error when column order in hive table is not same in orc file schema. The root cause is in order to handle the following case: The table in orc format of Hive 1.x may encounter system column names such as `_col0`, `_col1`, `_col2`... in the underlying orc file schema, which need to use the column names in the hive table for mapping. ### Solution Currently fix this issue by handling the following case by specifying hive version to 1.x.x in the hive catalog configuration. ```sql CREATE CATALOG hive PROPERTIES ( 'hive.version' = '1.x.x' ); ``` --- .../hive/scripts/create_preinstalled_table.hql | 20 +++++++ .../orc/test_different_column_orders.orc | Bin 0 -> 671 bytes .../parquet/test_different_column_orders.parquet | Bin 0 -> 925 bytes docs/en/docs/lakehouse/faq.md | 11 +++- docs/zh-CN/docs/lakehouse/faq.md | 8 +++ .../doris/catalog/external/HMSExternalTable.java | 4 ++ .../doris/datasource/HMSExternalCatalog.java | 4 ++ .../doris/datasource/hive/HiveVersionUtil.java | 22 ++++++++ .../doris/planner/external/HiveScanNode.java | 5 +- .../hive/test_different_column_orders.out | 29 ++++++++++ .../hive/test_different_column_orders.groovy | 60 +++++++++++++++++++++ 11 files changed, 161 insertions(+), 2 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index 69c58d0ac8..9f34c121df 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -660,4 +660,24 @@ update orc_full_acid_par set value = 'BB' where id = 2; alter table orc_full_acid_par PARTITION(part_col=20230101) compact 'major'; alter table orc_full_acid_par PARTITION(part_col=20230102) compact 'major'; +CREATE TABLE `test_different_column_orders_orc`( + `name` string, + `id` int, + `city` string, + `age` int, + `sex` string) +STORED AS ORC +LOCATION + '/user/doris/preinstalled_data/test_different_column_orders/orc'; + +CREATE TABLE `test_different_column_orders_parquet`( + `name` string, + `id` int, + `city` string, + `age` int, + `sex` string) +STORED AS PARQUET +LOCATION + '/user/doris/preinstalled_data/test_different_column_orders/parquet'; + show tables; diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/test_different_column_orders/orc/test_different_column_orders.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/test_different_column_orders/orc/test_different_column_orders.orc new file mode 100644 index 0000000000..3b8313cc4a Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/test_different_column_orders/orc/test_different_column_orders.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/test_different_column_orders/parquet/test_different_column_orders.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/test_different_column_orders/parquet/test_different_column_orders.parquet new file mode 100644 index 0000000000..7bdfeea380 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/test_different_column_orders/parquet/test_different_column_orders.parquet differ diff --git a/docs/en/docs/lakehouse/faq.md b/docs/en/docs/lakehouse/faq.md index ee40736f4d..ac9688bf08 100644 --- a/docs/en/docs/lakehouse/faq.md +++ b/docs/en/docs/lakehouse/faq.md @@ -138,4 +138,13 @@ under the License. } ] } - ``` \ No newline at end of file + ``` + +13. The table in orc format of Hive 1.x may encounter system column names such as `_col0`, `_col1`, `_col2`... in the underlying orc file schema, which need to be specified in the catalog configuration. Add `hive.version` to 1.x.x so that it will use the column names in the hive table for mapping. + + ```sql + CREATE CATALOG hive PROPERTIES ( + 'hive.version' = '1.x.x' + ); + ``` + diff --git a/docs/zh-CN/docs/lakehouse/faq.md b/docs/zh-CN/docs/lakehouse/faq.md index 4ba1910f4f..24a6ae94f2 100644 --- a/docs/zh-CN/docs/lakehouse/faq.md +++ b/docs/zh-CN/docs/lakehouse/faq.md @@ -140,3 +140,11 @@ under the License. } ``` +13. Hive 1.x 的 orc 格式的表可能会遇到底层 orc 文件 schema 中列名为 `_col0`,`_col1`,`_col2`... 这类系统列名,此时需要在 catalog 配置中添加 `hive.version` 为 1.x.x,这样就会使用 hive 表中的列名进行映射。 + + ```sql + CREATE CATALOG hive PROPERTIES ( + 'hive.version' = '1.x.x' + ); + ``` + diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java index 3e10f6636d..c9f03fd80c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java @@ -341,6 +341,10 @@ public class HMSExternalTable extends ExternalTable { return ((HMSExternalCatalog) catalog).getHiveMetastoreUris(); } + public String getHiveVersion() { + return ((HMSExternalCatalog) catalog).getHiveVersion(); + } + public Map<String, String> getCatalogProperties() { return catalog.getProperties(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java index 4d6c8b86fe..9d4fc479a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java @@ -124,6 +124,10 @@ public class HMSExternalCatalog extends ExternalCatalog { return catalogProperty.getOrDefault(HMSProperties.HIVE_METASTORE_URIS, ""); } + public String getHiveVersion() { + return catalogProperty.getOrDefault(HMSProperties.HIVE_VERSION, ""); + } + protected List<String> listDatabaseNames() { return client.getAllDatabases(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveVersionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveVersionUtil.java index 749520b62c..b93d5653ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveVersionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveVersionUtil.java @@ -72,4 +72,26 @@ public class HiveVersionUtil { return DEFAULT_HIVE_VERSION; } } + + public static boolean isHive1(String version) { + if (Strings.isNullOrEmpty(version)) { + return false; + } + String[] parts = version.split("\\."); + if (parts.length < 2) { + LOG.warn("invalid hive version: " + version); + return false; + } + try { + int major = Integer.parseInt(parts[0]); + if (major == 1) { + return true; + } else { + return false; + } + } catch (NumberFormatException e) { + LOG.warn("invalid hive version: " + version); + return false; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java index 738a2e3933..710fed10dd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java @@ -37,6 +37,7 @@ import org.apache.doris.datasource.hive.HiveMetaStoreCache; import org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue; import org.apache.doris.datasource.hive.HivePartition; import org.apache.doris.datasource.hive.HiveTransaction; +import org.apache.doris.datasource.hive.HiveVersionUtil; import org.apache.doris.planner.ListPartitionPrunerV2; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.planner.external.HiveSplit.HiveSplitCreator; @@ -91,7 +92,9 @@ public class HiveScanNode extends FileQueryScanNode { @Override protected void doInitialize() throws UserException { super.doInitialize(); - genSlotToSchemaIdMap(); + if (HiveVersionUtil.isHive1(hmsTable.getHiveVersion())) { + genSlotToSchemaIdMap(); + } String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat(); if (inputFormat.contains("TextInputFormat")) { for (SlotDescriptor slot : desc.getSlots()) { diff --git a/regression-test/data/external_catalog_p0/hive/test_different_column_orders.out b/regression-test/data/external_catalog_p0/hive/test_different_column_orders.out new file mode 100644 index 0000000000..6e3fc17262 --- /dev/null +++ b/regression-test/data/external_catalog_p0/hive/test_different_column_orders.out @@ -0,0 +1,29 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q01 -- +Tom 1 shanghai 48 male +Jerry 2 guangzhou 35 male +Frank 3 hangzhou 25 male +Ada 4 beijing 22 female + +-- !q02 -- +4 + +-- !q03 -- +guangzhou 1 +hangzhou 1 +shanghai 1 + +-- !q01 -- +Tom 1 shanghai 48 male +Jerry 2 guangzhou 35 male +Frank 3 hangzhou 25 male +Ada 4 beijing 22 female + +-- !q02 -- +4 + +-- !q03 -- +guangzhou 1 +hangzhou 1 +shanghai 1 + diff --git a/regression-test/suites/external_catalog_p0/hive/test_different_column_orders.groovy b/regression-test/suites/external_catalog_p0/hive/test_different_column_orders.groovy new file mode 100644 index 0000000000..3fcb5a3c3f --- /dev/null +++ b/regression-test/suites/external_catalog_p0/hive/test_different_column_orders.groovy @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_different_column_orders", "p0") { + def q_parquet = { + qt_q01 """ + select * from test_different_column_orders_parquet order by id; + """ + qt_q02 """ + select count(id) from test_different_column_orders_parquet; + """ + qt_q03 """ + select city, count(*) from test_different_column_orders_parquet where sex = 'male' group by city order by city; + """ + } + def q_orc = { + qt_q01 """ + select * from test_different_column_orders_orc order by id; + """ + qt_q02 """ + select count(id) from test_different_column_orders_orc; + """ + qt_q03 """ + select city, count(*) from test_different_column_orders_orc where sex = 'male' group by city order by city; + """ + } + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + try { + String hms_port = context.config.otherConfigs.get("hms_port") + String catalog_name = "test_different_column_orders" + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://127.0.0.1:${hms_port}' + );""" + sql """use `${catalog_name}`.`default`""" + + q_parquet() + q_orc() + + sql """drop catalog if exists ${catalog_name}""" + } finally { + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org