This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit 3fb44bee921234d11b1562284a3c23347fc27988 Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Tue Apr 4 08:50:47 2023 +0800 [Fix](orc-reader) Fix the scale of decimal column is incorrect when query orc tables. (#18324) The scale of decimal column is incorrect when query orc tables. --- be/src/vec/exec/format/orc/vorc_reader.cpp | 14 ++-- be/src/vec/exec/format/orc/vorc_reader.h | 21 ++++-- .../external_table_emr_p2/hive/test_wide_table.out | 23 +++++++ .../hive/test_wide_table.groovy | 74 ++++++++++++++++++++++ 4 files changed, 117 insertions(+), 15 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 0840155203..a6b2ef9f7b 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -709,17 +709,13 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, FOR_FLAT_ORC_COLUMNS(DISPATCH) #undef DISPATCH case TypeIndex::Decimal32: - return _decode_decimal_column<Int32>(col_name, data_column, data_type, - _decimal_scale_params, cvb, num_values); + return _decode_decimal_column<Int32>(col_name, data_column, data_type, cvb, num_values); case TypeIndex::Decimal64: - return _decode_decimal_column<Int64>(col_name, data_column, data_type, - _decimal_scale_params, cvb, num_values); + return _decode_decimal_column<Int64>(col_name, data_column, data_type, cvb, num_values); case TypeIndex::Decimal128: - return _decode_decimal_column<Int128>(col_name, data_column, data_type, - _decimal_scale_params, cvb, num_values); + return _decode_decimal_column<Int128>(col_name, data_column, data_type, cvb, num_values); case TypeIndex::Decimal128I: - return _decode_decimal_column<Int128>(col_name, data_column, data_type, - _decimal_scale_params, cvb, num_values); + return _decode_decimal_column<Int128>(col_name, data_column, data_type, cvb, num_values); case TypeIndex::Date: return _decode_time_column<VecDateTimeValue, Int64, orc::LongVectorBatch>( col_name, data_column, cvb, num_values); @@ -769,6 +765,8 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { SCOPED_RAW_TIMER(&_statistics.column_read_time); { SCOPED_RAW_TIMER(&_statistics.get_batch_time); + // reset decimal_scale_params_index + _decimal_scale_params_index = 0; if (!_row_reader->next(*_batch)) { *eof = true; *read_rows = 0; diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 53f4a91880..d4a65925b9 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -170,13 +170,19 @@ private: Status _decode_explicit_decimal_column(const std::string& col_name, const MutableColumnPtr& data_column, const DataTypePtr& data_type, - DecimalScaleParams& scale_params, orc::ColumnVectorBatch* cvb, size_t num_values) { OrcColumnType* data = dynamic_cast<OrcColumnType*>(cvb); if (data == nullptr) { return Status::InternalError("Wrong data type for colum '{}'", col_name); } - _init_decimal_converter<DecimalPrimitiveType>(data_type, scale_params, data->scale); + if (_decimal_scale_params_index >= _decimal_scale_params.size()) { + DecimalScaleParams temp_scale_params; + _init_decimal_converter<DecimalPrimitiveType>(data_type, temp_scale_params, + data->scale); + _decimal_scale_params.emplace_back(std::move(temp_scale_params)); + } + DecimalScaleParams& scale_params = _decimal_scale_params[_decimal_scale_params_index]; + ++_decimal_scale_params_index; auto* cvb_data = data->values.data(); auto& column_data = @@ -206,16 +212,16 @@ private: template <typename DecimalPrimitiveType> Status _decode_decimal_column(const std::string& col_name, const MutableColumnPtr& data_column, - const DataTypePtr& data_type, DecimalScaleParams& scale_params, - orc::ColumnVectorBatch* cvb, size_t num_values) { + const DataTypePtr& data_type, orc::ColumnVectorBatch* cvb, + size_t num_values) { SCOPED_RAW_TIMER(&_statistics.decode_value_time); if (dynamic_cast<orc::Decimal64VectorBatch*>(cvb) != nullptr) { return _decode_explicit_decimal_column<DecimalPrimitiveType, orc::Decimal64VectorBatch>( - col_name, data_column, data_type, scale_params, cvb, num_values); + col_name, data_column, data_type, cvb, num_values); } else { return _decode_explicit_decimal_column<DecimalPrimitiveType, orc::Decimal128VectorBatch>( - col_name, data_column, data_type, scale_params, cvb, num_values); + col_name, data_column, data_type, cvb, num_values); } } @@ -293,7 +299,8 @@ private: orc::RowReaderOptions _row_reader_options; // only for decimal - DecimalScaleParams _decimal_scale_params; + std::vector<DecimalScaleParams> _decimal_scale_params; + size_t _decimal_scale_params_index; }; } // namespace doris::vectorized diff --git a/regression-test/data/external_table_emr_p2/hive/test_wide_table.out b/regression-test/data/external_table_emr_p2/hive/test_wide_table.out new file mode 100644 index 0000000000..143aeb9bf5 --- /dev/null +++ b/regression-test/data/external_table_emr_p2/hive/test_wide_table.out @@ -0,0 +1,23 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !01 -- +6117920261 28156890937818.64 11058113 84788841307158.93 9988065.83660 8116313253956313.527443 + +-- !02 -- +6117920261 28156890937818.64 11058113 84788841307158.93 9988065.83660 8116313253956313.527443 + +-- !03 -- + +-- !04 -- + +-- !05 -- +6117920261 28156890937818.64 11058113 84788841307158.93 9988065.83660 8116313253956313.527443 + +-- !06 -- +6117920261 28156890937818.64 11058113 84788841307158.93 9988065.83660 8116313253956313.527443 + +-- !07 -- +6117920261 28156890937818.64 11058113 84788841307158.93 9988065.83660 8116313253956313.527443 + +-- !08 -- +9999999541515682.000000000 99999218685068.860000000 99999869.000000000 221095586.110000000 27.542540000 61077635638.763621000 + diff --git a/regression-test/suites/external_table_emr_p2/hive/test_wide_table.groovy b/regression-test/suites/external_table_emr_p2/hive/test_wide_table.groovy new file mode 100644 index 0000000000..f7ba390c1f --- /dev/null +++ b/regression-test/suites/external_table_emr_p2/hive/test_wide_table.groovy @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_wide_table", "p2") { + + def formats = ["_orc"] + def decimal_test1 = """select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1;""" + def decimal_test2 = """select * from + (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 = 9988065.8366; + """ + def decimal_test3 = """select * from + (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 = 9988065.8367; + """ + def decimal_test4 = """select * from + (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 = 9988065.836; + """ + def decimal_test5 = """select * from + (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 = 9988065.836600; + """ + def decimal_test6 = """select * from + (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 > 9988065.83653; + """ + def decimal_test7 = """select * from + (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 < 9988065.83673; + """ + def decimal_test8 = """select max(col1), max(col70), max(col71), min(col81), min(col100), min(col534) from wide_table1SUFFIX;""" + + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "external_wide_table" + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + sql """use wide_tables;""" + logger.info("use wide_tables") + + for (String format in formats) { + logger.info("Process format " + format) + qt_01 decimal_test1.replace("SUFFIX", format) + qt_02 decimal_test2.replace("SUFFIX", format) + qt_03 decimal_test3.replace("SUFFIX", format) + qt_04 decimal_test4.replace("SUFFIX", format) + qt_05 decimal_test5.replace("SUFFIX", format) + qt_06 decimal_test6.replace("SUFFIX", format) + qt_07 decimal_test7.replace("SUFFIX", format) + qt_08 decimal_test8.replace("SUFFIX", format) + } + } +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org