This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0-beta in repository https://gitbox.apache.org/repos/asf/doris.git
commit e625826e207d1cc2a06e2d7a43626d77f5587ca8 Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Thu Jun 8 18:10:36 2023 +0800 [Fix](multi-catalog) Fix be crashed when query hive table after schema changed(new column added). (#20537) Fix be crashed when query hive table after schema changed(new column added). Regression Test: test_hive_schema_evolution.groovy --- be/src/vec/exec/format/orc/vorc_reader.cpp | 16 +++-- .../hive/scripts/create_preinstalled_table.hql | 27 ++++++++ .../hive/test_hive_schema_evolution.out | 37 +++++++++++ .../hive/test_hive_schema_evolution.groovy | 75 ++++++++++++++++++++++ 4 files changed, 150 insertions(+), 5 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index fdc9e3404d..945cf09087 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -273,11 +273,17 @@ Status OrcReader::_init_read_columns() { for (auto& col_name : _column_names) { if (_is_hive) { auto iter = _scan_params.slot_name_to_schema_pos.find(col_name); - int pos = iter->second; - if (_is_acid) { - orc_cols_lower_case[ACID_ROW_OFFSET + 1 + pos] = iter->first; - } else { - orc_cols_lower_case[pos] = iter->first; + if (iter != _scan_params.slot_name_to_schema_pos.end()) { + int pos = iter->second; + if (_is_acid) { + if (ACID_ROW_OFFSET + 1 + pos < orc_cols_lower_case.size()) { + orc_cols_lower_case[ACID_ROW_OFFSET + 1 + pos] = iter->first; + } + } else { + if (pos < orc_cols_lower_case.size()) { + orc_cols_lower_case[pos] = iter->first; + } + } } } auto iter = std::find(orc_cols_lower_case.begin(), orc_cols_lower_case.end(), col_name); diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index 86cd6649a7..4c6108e9d4 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -594,4 +594,31 @@ CREATE TABLE `unsupported_type_table`( k6 int ); +CREATE TABLE `schema_evo_test_text`( + id int, + name string +) +ROW FORMAT DELIMITED FIELDS TERMINATED by ','; +insert into `schema_evo_test_text` select 1, "kaka"; +alter table `schema_evo_test_text` ADD COLUMNS (`ts` timestamp); +insert into `schema_evo_test_text` select 2, "messi", from_unixtime(to_unix_timestamp('20230101 13:01:03','yyyyMMdd HH:mm:ss')); + +CREATE TABLE `schema_evo_test_parquet`( + id int, + name string +) +stored as parquet; +insert into `schema_evo_test_parquet` select 1, "kaka"; +alter table `schema_evo_test_parquet` ADD COLUMNS (`ts` timestamp); +insert into `schema_evo_test_parquet` select 2, "messi", from_unixtime(to_unix_timestamp('20230101 13:01:03','yyyyMMdd HH:mm:ss')); + +CREATE TABLE `schema_evo_test_orc`( + id int, + name string +) +stored as orc; +insert into `schema_evo_test_orc` select 1, "kaka"; +alter table `schema_evo_test_orc` ADD COLUMNS (`ts` timestamp); +insert into `schema_evo_test_orc` select 2, "messi", from_unixtime(to_unix_timestamp('20230101 13:01:03','yyyyMMdd HH:mm:ss')); + show tables; diff --git a/regression-test/data/external_table_emr_p2/hive/test_hive_schema_evolution.out b/regression-test/data/external_table_emr_p2/hive/test_hive_schema_evolution.out new file mode 100644 index 0000000000..1cb5cde151 --- /dev/null +++ b/regression-test/data/external_table_emr_p2/hive/test_hive_schema_evolution.out @@ -0,0 +1,37 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q01 -- +1 kaka \N +2 messi 2023-01-01T13:01:03 + +-- !q02 -- +1 kaka \N +2 messi 2023-01-01T13:01:03 + +-- !q03 -- +\N +2023-01-01T13:01:03 + +-- !q01 -- +1 kaka \N +2 messi 2023-01-01T21:01:03 + +-- !q02 -- +1 kaka \N +2 messi 2023-01-01T21:01:03 + +-- !q03 -- +\N +2023-01-01T21:01:03 + +-- !q01 -- +1 kaka \N +2 messi 2023-01-01T13:01:03 + +-- !q02 -- +1 kaka \N +2 messi 2023-01-01T13:01:03 + +-- !q03 -- +\N +2023-01-01T13:01:03 + diff --git a/regression-test/suites/external_table_emr_p2/hive/test_hive_schema_evolution.groovy b/regression-test/suites/external_table_emr_p2/hive/test_hive_schema_evolution.groovy new file mode 100644 index 0000000000..2cbe589e8b --- /dev/null +++ b/regression-test/suites/external_table_emr_p2/hive/test_hive_schema_evolution.groovy @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_schema_evolution", "p0") { + def q_text = { + qt_q01 """ + select * from schema_evo_test_text order by id; + """ + qt_q02 """ + select id, name, ts from schema_evo_test_text order by id; + """ + qt_q03 """ + select ts from schema_evo_test_text order by id; + """ + } + + def q_parquet = { + qt_q01 """ + select * from schema_evo_test_parquet order by id; + """ + qt_q02 """ + select id, name, ts from schema_evo_test_parquet order by id; + """ + qt_q03 """ + select ts from schema_evo_test_parquet order by id; + """ + } + + def q_orc = { + qt_q01 """ + select * from schema_evo_test_orc order by id; + """ + qt_q02 """ + select id, name, ts from schema_evo_test_orc order by id; + """ + qt_q03 """ + select ts from schema_evo_test_orc order by id; + """ + } + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + try { + String hms_port = context.config.otherConfigs.get("hms_port") + String catalog_name = "test_hive_schema_evolution" + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://127.0.0.1:${hms_port}' + );""" + sql """use `${catalog_name}`.`default`""" + + q_text() + q_parquet() + q_orc() + + sql """drop catalog if exists ${catalog_name}""" + } finally { + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org