This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new bc062a25950 [fix](orc)fix orc reader missing column. (#35735) bc062a25950 is described below commit bc062a259500ca3630029d4eb01585dc998e05f5 Author: daidai <2017501...@qq.com> AuthorDate: Fri May 31 22:51:44 2024 +0800 [fix](orc)fix orc reader missing column. (#35735) ## Proposed changes bp #35583 Issue Number: close #xxx <!--Describe your changes.--> --- be/src/vec/exec/format/orc/vorc_reader.cpp | 18 +- .../hive/scripts/create_preinstalled_table.hql | 21 ++- .../hive/test_hive_orc_add_column.out | 185 +++++++++++++++++++++ .../hive/test_hive_orc_add_column.groovy | 95 +++++++++++ 4 files changed, 310 insertions(+), 9 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 8742599c82f..16909f0023a 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -821,6 +821,15 @@ Status OrcReader::set_fill_columns( if (iter == predicate_columns.end()) { _lazy_read_ctx.missing_columns.emplace(kv.first, kv.second); } else { + //For check missing column : missing column == xx, missing column is null,missing column is not null. + if (_slot_id_to_filter_conjuncts->find(iter->second.second) != + _slot_id_to_filter_conjuncts->end()) { + for (auto& ctx : _slot_id_to_filter_conjuncts->find(iter->second.second)->second) { + _filter_conjuncts.emplace_back(ctx); + } + } + + // predicate_missing_columns is VLiteral.To fill in default values for missing columns. _lazy_read_ctx.predicate_missing_columns.emplace(kv.first, kv.second); _lazy_read_ctx.all_predicate_col_ids.emplace_back(iter->second.first); } @@ -1732,10 +1741,6 @@ Status OrcReader::get_next_block_impl(Block* block, size_t* read_rows, bool* eof for (auto& conjunct : _non_dict_filter_conjuncts) { filter_conjuncts.emplace_back(conjunct); } - //include missing_columns != missing_columns ; missing_column is null; missing_column != file_columns etc... - for (auto& [missing_col, conjunct] : _lazy_read_ctx.predicate_missing_columns) { - filter_conjuncts.emplace_back(conjunct); - } std::vector<IColumn::Filter*> filters; if (_delete_rows_filter_ptr) { filters.push_back(_delete_rows_filter_ptr.get()); @@ -1757,6 +1762,7 @@ Status OrcReader::get_next_block_impl(Block* block, size_t* read_rows, bool* eof RETURN_IF_CATCH_EXCEPTION( Block::filter_block_internal(block, columns_to_filter, result_filter)); } + //_not_single_slot_filter_conjuncts check : missing column1 == missing column2 , missing column == exists column ... if (!_not_single_slot_filter_conjuncts.empty()) { RETURN_IF_ERROR(_convert_dict_cols_to_string_cols(block, &batch_vec)); RETURN_IF_CATCH_EXCEPTION( @@ -1894,10 +1900,6 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s for (auto& conjunct : _non_dict_filter_conjuncts) { filter_conjuncts.emplace_back(conjunct); } - //include missing_columns != missing_columns ; missing_column is null; missing_column != file_columns etc... - for (auto& [missing_col, conjunct] : _lazy_read_ctx.predicate_missing_columns) { - filter_conjuncts.emplace_back(conjunct); - } std::vector<IColumn::Filter*> filters; if (_delete_rows_filter_ptr) { filters.push_back(_delete_rows_filter_ptr.get()); diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index ab7a9852bfc..4a63b85f563 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -603,6 +603,26 @@ CREATE TABLE `unsupported_type_table`( set hive.stats.column.autogather=false; +CREATE TABLE `test_hive_orc_add_column`( + id int, + col1 int +) +stored as orc; +insert into `test_hive_orc_add_column` values(1,2); +insert into `test_hive_orc_add_column` values(3,4),(4,6); +alter table `test_hive_orc_add_column` ADD COLUMNS (col2 int); +insert into `test_hive_orc_add_column` values(7,8,9); +insert into `test_hive_orc_add_column` values(10,11,null); +insert into `test_hive_orc_add_column` values(12,13,null); +insert into `test_hive_orc_add_column` values(14,15,16); +alter table `test_hive_orc_add_column` ADD COLUMNS (col3 int,col4 string); +insert into `test_hive_orc_add_column` values(17,18,19,20,"hello world"); +insert into `test_hive_orc_add_column` values(21,22,23,24,"cywcywcyw"); +insert into `test_hive_orc_add_column` values(25,26,null,null,null); +insert into `test_hive_orc_add_column` values(27,28,29,null,null); +insert into `test_hive_orc_add_column` values(30,31,32,33,null); + + CREATE TABLE `schema_evo_test_text`( id int, name string @@ -2500,4 +2520,3 @@ PARTITIONED BY ( `varchar_col` varchar(50)) stored as orc TBLPROPERTIES("orc.compress"="ZLIB"); - diff --git a/regression-test/data/external_table_p0/hive/test_hive_orc_add_column.out b/regression-test/data/external_table_p0/hive/test_hive_orc_add_column.out new file mode 100644 index 00000000000..d2691568e5f --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_hive_orc_add_column.out @@ -0,0 +1,185 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !orc_add_col1 -- +1 2 \N \N \N +3 4 \N \N \N +4 6 \N \N \N +7 8 9 \N \N +10 11 \N \N \N +12 13 \N \N \N +14 15 16 \N \N +17 18 19 20 hello world +21 22 23 24 cywcywcyw +25 26 \N \N \N +27 28 29 \N \N +30 31 32 33 \N + +-- !orc_add_col2 -- + +-- !orc_add_col3 -- + +-- !orc_add_col4 -- +1 2 \N \N \N +3 4 \N \N \N +4 6 \N \N \N +10 11 \N \N \N +12 13 \N \N \N +25 26 \N \N \N + +-- !orc_add_col5 -- +\N +\N +\N +\N +\N +\N + +-- !orc_add_col6 -- +1 2 \N \N \N +3 4 \N \N \N +4 6 \N \N \N +7 8 9 \N \N +10 11 \N \N \N +12 13 \N \N \N +14 15 16 \N \N +25 26 \N \N \N +27 28 29 \N \N + +-- !orc_add_col7 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !orc_add_col8 -- +1 2 \N \N \N +3 4 \N \N \N +4 6 \N \N \N +7 8 9 \N \N +10 11 \N \N \N +12 13 \N \N \N +14 15 16 \N \N +25 26 \N \N \N +27 28 29 \N \N +30 31 32 33 \N + +-- !orc_add_col9 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + +-- !orc_add_col10 -- +1 2 \N \N \N +3 4 \N \N \N +4 6 \N \N \N +7 8 9 \N \N +10 11 \N \N \N +12 13 \N \N \N +14 15 16 \N \N +17 18 19 20 hello world +21 22 23 24 cywcywcyw +25 26 \N \N \N +27 28 29 \N \N +30 31 32 33 \N + +-- !orc_add_col11 -- +2 +4 +6 +8 +11 +13 +15 +18 +22 +26 +28 +31 + +-- !orc_add_col12 -- +7 8 9 \N \N +14 15 16 \N \N +17 18 19 20 hello world +21 22 23 24 cywcywcyw +27 28 29 \N \N +30 31 32 33 \N + +-- !orc_add_col13 -- +9 +16 +19 +23 +29 +32 + +-- !orc_add_col14 -- +17 18 19 20 hello world +21 22 23 24 cywcywcyw +30 31 32 33 \N + +-- !orc_add_col15 -- +20 +24 +33 + +-- !orc_add_col16 -- +17 18 19 20 hello world +21 22 23 24 cywcywcyw + +-- !orc_add_col17 -- +cywcywcyw +hello world + +-- !orc_add_col18 -- +7 8 9 \N \N + +-- !orc_add_col19 -- + +-- !orc_add_col20 -- +7 8 9 \N \N +14 15 16 \N \N +17 18 19 20 hello world +21 22 23 24 cywcywcyw +27 28 29 \N \N +30 31 32 33 \N + +-- !orc_add_col21 -- +7 8 9 \N \N +14 15 16 \N \N +17 18 19 20 hello world +21 22 23 24 cywcywcyw +27 28 29 \N \N +30 31 32 33 \N + +-- !orc_add_col22 -- + +-- !orc_add_col23 -- +30 31 32 33 \N + +-- !orc_add_col24 -- + +-- !orc_add_col25 -- +17 18 19 20 hello world +21 22 23 24 cywcywcyw +30 31 32 33 \N + +-- !orc_add_col26 -- + +-- !orc_add_col27 -- +21 22 23 24 cywcywcyw + +-- !orc_add_col28 -- +17 18 19 20 hello world +21 22 23 24 cywcywcyw + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_orc_add_column.groovy b/regression-test/suites/external_table_p0/hive/test_hive_orc_add_column.groovy new file mode 100644 index 00000000000..da3a75bd0b1 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_orc_add_column.groovy @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_orc_add_column", "all_types,p0,external,hive,external_docker,external_docker_hive") { + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + try { + String hms_port = context.config.otherConfigs.get("hive3HmsPort") + String catalog_name = "hive3_test_orc_add_column" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + sql """use `${catalog_name}`.`default`""" + + + qt_orc_add_col1 """select * from test_hive_orc_add_column order by id ;""" + qt_orc_add_col2 """select * from test_hive_orc_add_column where col1 is null order by id ;""" + qt_orc_add_col3 """select col1 from test_hive_orc_add_column where col1 is null;""" + qt_orc_add_col4 """select * from test_hive_orc_add_column where col2 is null order by id ;""" + qt_orc_add_col5 """select col2 from test_hive_orc_add_column where col2 is null;""" + qt_orc_add_col6 """select * from test_hive_orc_add_column where col3 is null order by id ;""" + qt_orc_add_col7 """select col3 from test_hive_orc_add_column where col3 is null;""" + qt_orc_add_col8 """select * from test_hive_orc_add_column where col4 is null order by id ;""" + qt_orc_add_col9 """select col4 from test_hive_orc_add_column where col4 is null;""" + qt_orc_add_col10 """select * from test_hive_orc_add_column where col1 is not null order by id ;""" + qt_orc_add_col11 """select col1 from test_hive_orc_add_column where col1 is not null order by col1;""" + qt_orc_add_col12 """select * from test_hive_orc_add_column where col2 is not null order by id ;""" + qt_orc_add_col13 """select col2 from test_hive_orc_add_column where col2 is not null order by col2;""" + qt_orc_add_col14 """select * from test_hive_orc_add_column where col3 is not null order by id ;""" + qt_orc_add_col15 """select col3 from test_hive_orc_add_column where col3 is not null order by col3;""" + qt_orc_add_col16 """select * from test_hive_orc_add_column where col4 is not null order by id ;""" + qt_orc_add_col17 """select col4 from test_hive_orc_add_column where col4 is not null order by col4;""" + qt_orc_add_col18 """select * from test_hive_orc_add_column where col2 = 9 order by id ;""" + qt_orc_add_col19 """select * from test_hive_orc_add_column where col2 = 190 order by id ;""" + qt_orc_add_col20 """select * from test_hive_orc_add_column where col2 - col1 = 1 order by id ;""" + qt_orc_add_col21 """select * from test_hive_orc_add_column where col2 - id = 2 order by id ;""" + qt_orc_add_col22 """select * from test_hive_orc_add_column where col2 - id = 3 order by id ;""" + qt_orc_add_col23 """select * from test_hive_orc_add_column where col3 = 33 order by id ;""" + qt_orc_add_col24 """select * from test_hive_orc_add_column where col3 = 330 order by id ;""" + qt_orc_add_col25 """select * from test_hive_orc_add_column where col3 - col1 = 2 order by id ;""" + qt_orc_add_col26 """select * from test_hive_orc_add_column where col3 - id != 3 order by id ;""" + qt_orc_add_col27 """select * from test_hive_orc_add_column where col1 + col2 + col3 = 23*3 order by id ;""" + qt_orc_add_col28 """select * from test_hive_orc_add_column where col1 + col2 + col3 != 32*3 order by id ; """ + + + + sql """drop catalog if exists ${catalog_name}""" + + } finally { + } + +} + + +// CREATE TABLE `test_hive_orc_add_column`( +// id int, +// col1 int +// ) +// stored as orc; +// insert into `test_hive_orc_add_column` values(1,2); +// insert into `test_hive_orc_add_column` values(3,4),(4,6); +// alter table `test_hive_orc_add_column` ADD COLUMNS(col2 int); +// insert into `test_hive_orc_add_column` values(7,8,9); +// insert into `test_hive_orc_add_column` values(10,11,null); +// insert into `test_hive_orc_add_column` values(12,13,null); +// insert into `test_hive_orc_add_column` values(14,15,16); +// alter table `test_hive_orc_add_column` ADD COLUMNS(col3 int,col4 string); +// insert into `test_hive_orc_add_column` values(17,18,19,20,"hello world"); +// insert into `test_hive_orc_add_column` values(21,22,23,24,"cywcywcyw"); +// insert into `test_hive_orc_add_column` values(25,26,null,null,null); +// insert into `test_hive_orc_add_column` values(27,28,29,null,null); +// insert into `test_hive_orc_add_column` values(30,31,32,33,null); \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org