This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 681f960257 [fix](storage)(vectorized) query get wrong result when read datetime type column (#8872) 681f960257 is described below commit 681f9602575492fd2a2c81bff5a13ddf61401ab0 Author: Pxl <952130...@qq.com> AuthorDate: Mon Apr 18 19:34:06 2022 +0800 [fix](storage)(vectorized) query get wrong result when read datetime type column (#8872) --- be/src/olap/row_block2.cpp | 25 ++-- .../aggregate_function_window_funnel.h | 6 +- be/src/vec/columns/column_vector.h | 9 +- be/src/vec/columns/predicate_column.h | 8 +- be/src/vec/exec/volap_scanner.cpp | 135 --------------------- be/src/vec/exec/volap_scanner.h | 2 - be/src/vec/functions/function_rpc.cpp | 14 ++- be/src/vec/functions/function_timestamp.cpp | 5 +- be/src/vec/runtime/vdatetime_value.h | 28 ++++- 9 files changed, 64 insertions(+), 168 deletions(-) diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index 4f2feec37b..8beca02192 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -236,11 +236,12 @@ Status RowBlockV2::_copy_data_to_column(int cid, value |= *(unsigned char*)(ptr + 1); value <<= 8; value |= *(unsigned char*)(ptr); - vectorized::VecDateTimeValue date; - date.from_olap_date(value); + vectorized::VecDateTimeValue date = + vectorized::VecDateTimeValue::create_from_olap_date(value); (column_int)->insert_data(reinterpret_cast<char*>(&date), 0); - } else + } else { column_int->insert_default(); + } } break; } @@ -253,9 +254,9 @@ Status RowBlockV2::_copy_data_to_column(int cid, auto ptr = reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)); uint64_t value = *reinterpret_cast<const uint64_t*>(ptr); - vectorized::VecDateTimeValue data; - data.from_olap_datetime(value); - (column_int)->insert_data(reinterpret_cast<char*>(&data), 0); + vectorized::VecDateTimeValue datetime = + vectorized::VecDateTimeValue::create_from_olap_datetime(value); + (column_int)->insert_data(reinterpret_cast<char*>(&datetime), 0); } else { column_int->insert_default(); } @@ -498,11 +499,12 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t value |= *(unsigned char*)(ptr + 1); value <<= 8; value |= *(unsigned char*)(ptr); - vectorized::VecDateTimeValue date; - date.from_olap_date(value); + vectorized::VecDateTimeValue date = + vectorized::VecDateTimeValue::create_from_olap_date(value); (column_int)->insert_data(reinterpret_cast<char*>(&date), 0); - } else + } else { column_int->insert_default(); + } } break; } @@ -515,8 +517,9 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t auto ptr = reinterpret_cast<const char*>(batch->cell_ptr(row_idx)); uint64_t value = *reinterpret_cast<const uint64_t*>(ptr); - vectorized::VecDateTimeValue data(value); - (column_int)->insert_data(reinterpret_cast<char*>(&data), 0); + vectorized::VecDateTimeValue datetime = + vectorized::VecDateTimeValue::create_from_olap_datetime(value); + (column_int)->insert_data(reinterpret_cast<char*>(&datetime), 0); } else { column_int->insert_default(); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h index f4364eebb4..8f42a3398c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h @@ -132,7 +132,8 @@ struct WindowFunnelState { write_var_int(events.size(), out); for (int64_t i = 0; i < events.size(); i++) { - int64_t timestamp = events[i].first; + int64_t timestamp = + binary_cast<vectorized::VecDateTimeValue, vectorized::Int64>(events[i].first); int event_idx = events[i].second; write_var_int(timestamp, out); write_var_int(event_idx, out); @@ -152,7 +153,8 @@ struct WindowFunnelState { read_var_int(timestamp, in); read_var_int(event_idx, in); - VecDateTimeValue time_value(timestamp); + VecDateTimeValue time_value = + binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(timestamp); add(time_value, (int)event_idx, max_event_level, window); } } diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 216e8fd0ed..e57ffe4a9c 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -21,6 +21,7 @@ #pragma once #include <cmath> +#include <type_traits> #include "olap/uint24.h" #include "vec/columns/column.h" @@ -177,8 +178,7 @@ public: value |= *(unsigned char*)(cur_ptr + 1); value <<= 8; value |= *(unsigned char*)(cur_ptr); - vectorized::VecDateTimeValue date; - date.from_olap_date(value); + vectorized::VecDateTimeValue date = VecDateTimeValue::create_from_olap_date(value); this->insert_data(reinterpret_cast<char*>(&date), 0); } } @@ -188,8 +188,9 @@ public: for (int i = 0; i < num; i++) { const char* cur_ptr = data_ptr + value_size * i; uint64_t value = *reinterpret_cast<const uint64_t*>(cur_ptr); - vectorized::VecDateTimeValue date(value); - this->insert_data(reinterpret_cast<char*>(&date), 0); + vectorized::VecDateTimeValue datetime = + VecDateTimeValue::create_from_olap_datetime(value); + this->insert_data(reinterpret_cast<char*>(&datetime), 0); } } diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h index f41c239b5e..7db73b9d0a 100644 --- a/be/src/vec/columns/predicate_column.h +++ b/be/src/vec/columns/predicate_column.h @@ -58,8 +58,7 @@ private: void insert_date_to_res_column(const uint16_t* sel, size_t sel_size, vectorized::ColumnVector<Int64>* res_ptr) { for (size_t i = 0; i < sel_size; i++) { - VecDateTimeValue date; - date.from_olap_date(get_date_at(sel[i])); + VecDateTimeValue date = VecDateTimeValue::create_from_olap_date(get_date_at(sel[i])); res_ptr->insert_data(reinterpret_cast<char*>(&date), 0); } } @@ -68,8 +67,9 @@ private: vectorized::ColumnVector<Int64>* res_ptr) { for (size_t i = 0; i < sel_size; i++) { uint64_t value = data[sel[i]]; - vectorized::VecDateTimeValue date(value); - res_ptr->insert_data(reinterpret_cast<char*>(&date), 0); + vectorized::VecDateTimeValue datetime = + VecDateTimeValue::create_from_olap_datetime(value); + res_ptr->insert_data(reinterpret_cast<char*>(&datetime), 0); } } diff --git a/be/src/vec/exec/volap_scanner.cpp b/be/src/vec/exec/volap_scanner.cpp index c29d1aa60e..410fa8f6b6 100644 --- a/be/src/vec/exec/volap_scanner.cpp +++ b/be/src/vec/exec/volap_scanner.cpp @@ -83,139 +83,4 @@ Status VOlapScanner::get_block(RuntimeState* state, vectorized::Block* block, bo void VOlapScanner::set_tablet_reader() { _tablet_reader = std::make_unique<BlockReader>(); } - -void VOlapScanner::_convert_row_to_block(std::vector<vectorized::MutableColumnPtr>* columns) { - size_t slots_size = _query_slots.size(); - for (int i = 0; i < slots_size; ++i) { - SlotDescriptor* slot_desc = _query_slots[i]; - auto cid = _return_columns[i]; - - auto* column_ptr = (*columns)[i].get(); - if (slot_desc->is_nullable()) { - auto* nullable_column = reinterpret_cast<ColumnNullable*>((*columns)[i].get()); - if (_read_row_cursor.is_null(cid)) { - nullable_column->insert_data(nullptr, 0); - continue; - } else { - nullable_column->get_null_map_data().push_back(0); - column_ptr = &nullable_column->get_nested_column(); - } - } - - char* ptr = (char*)_read_row_cursor.cell_ptr(cid); - switch (slot_desc->type().type) { - case TYPE_BOOLEAN: { - assert_cast<ColumnVector<UInt8>*>(column_ptr)->insert_data(ptr, 0); - break; - } - case TYPE_TINYINT: { - assert_cast<ColumnVector<Int8>*>(column_ptr)->insert_data(ptr, 0); - break; - } - case TYPE_SMALLINT: { - assert_cast<ColumnVector<Int16>*>(column_ptr)->insert_data(ptr, 0); - break; - } - case TYPE_INT: { - assert_cast<ColumnVector<Int32>*>(column_ptr)->insert_data(ptr, 0); - break; - } - case TYPE_BIGINT: { - assert_cast<ColumnVector<Int64>*>(column_ptr)->insert_data(ptr, 0); - break; - } - case TYPE_LARGEINT: { - assert_cast<ColumnVector<Int128>*>(column_ptr)->insert_data(ptr, 0); - break; - } - case TYPE_FLOAT: { - assert_cast<ColumnVector<Float32>*>(column_ptr)->insert_data(ptr, 0); - break; - } - case TYPE_DOUBLE: { - assert_cast<ColumnVector<Float64>*>(column_ptr)->insert_data(ptr, 0); - break; - } - case TYPE_CHAR: { - Slice* slice = reinterpret_cast<Slice*>(ptr); - assert_cast<ColumnString*>(column_ptr) - ->insert_data(slice->data, strnlen(slice->data, slice->size)); - break; - } - case TYPE_VARCHAR: - case TYPE_STRING: { - Slice* slice = reinterpret_cast<Slice*>(ptr); - assert_cast<ColumnString*>(column_ptr)->insert_data(slice->data, slice->size); - break; - } - case TYPE_OBJECT: { - Slice* slice = reinterpret_cast<Slice*>(ptr); - // insert_default() - auto* target_column = assert_cast<ColumnBitmap*>(column_ptr); - - target_column->insert_default(); - BitmapValue* pvalue = nullptr; - int pos = target_column->size() - 1; - pvalue = &target_column->get_element(pos); - - if (slice->size != 0) { - BitmapValue value; - value.deserialize(slice->data); - *pvalue = std::move(value); - } else { - *pvalue = std::move(*reinterpret_cast<BitmapValue*>(slice->data)); - } - break; - } - case TYPE_HLL: { - Slice* slice = reinterpret_cast<Slice*>(ptr); - auto* target_column = assert_cast<ColumnHLL*>(column_ptr); - - target_column->insert_default(); - HyperLogLog* pvalue = nullptr; - int pos = target_column->size() - 1; - pvalue = &target_column->get_element(pos); - if (slice->size != 0) { - HyperLogLog value; - value.deserialize(*slice); - *pvalue = std::move(value); - } else { - *pvalue = std::move(*reinterpret_cast<HyperLogLog*>(slice->data)); - } - break; - } - case TYPE_DECIMALV2: { - int64_t int_value = *(int64_t*)(ptr); - int32_t frac_value = *(int32_t*)(ptr + sizeof(int64_t)); - DecimalV2Value data(int_value, frac_value); - assert_cast<ColumnDecimal<Decimal128>*>(column_ptr) - ->insert_data(reinterpret_cast<char*>(&data), 0); - break; - } - case TYPE_DATETIME: { - uint64_t value = *reinterpret_cast<uint64_t*>(ptr); - VecDateTimeValue data(value); - assert_cast<ColumnVector<Int64>*>(column_ptr) - ->insert_data(reinterpret_cast<char*>(&data), 0); - break; - } - case TYPE_DATE: { - uint64_t value = 0; - value = *(unsigned char*)(ptr + 2); - value <<= 8; - value |= *(unsigned char*)(ptr + 1); - value <<= 8; - value |= *(unsigned char*)(ptr); - VecDateTimeValue date; - date.from_olap_date(value); - assert_cast<ColumnVector<Int64>*>(column_ptr) - ->insert_data(reinterpret_cast<char*>(&date), 0); - break; - } - default: { - break; - } - } - } -} } // namespace doris::vectorized diff --git a/be/src/vec/exec/volap_scanner.h b/be/src/vec/exec/volap_scanner.h index b6ef7e32ff..8c1ccca99e 100644 --- a/be/src/vec/exec/volap_scanner.h +++ b/be/src/vec/exec/volap_scanner.h @@ -52,8 +52,6 @@ protected: virtual void set_tablet_reader() override; private: - // TODO: Remove this function after we finish reader vec - void _convert_row_to_block(std::vector<vectorized::MutableColumnPtr>* columns); VExprContext* _vconjunct_ctx = nullptr; bool _need_to_close = false; }; diff --git a/be/src/vec/functions/function_rpc.cpp b/be/src/vec/functions/function_rpc.cpp index 9208ae2295..9b2e11d08a 100644 --- a/be/src/vec/functions/function_rpc.cpp +++ b/be/src/vec/functions/function_rpc.cpp @@ -231,13 +231,16 @@ void convert_col_to_pvalue(const ColumnPtr& column, const DataTypePtr& data_type PDateTime* date_time = arg->add_datetime_value(); if constexpr (nullable) { if (!column->is_null_at(row_num)) { - VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num)); + VecDateTimeValue v = + binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>( + column->get_int(row_num)); date_time->set_day(v.day()); date_time->set_month(v.month()); date_time->set_year(v.year()); } } else { - VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num)); + VecDateTimeValue v = binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>( + column->get_int(row_num)); date_time->set_day(v.day()); date_time->set_month(v.month()); date_time->set_year(v.year()); @@ -252,7 +255,9 @@ void convert_col_to_pvalue(const ColumnPtr& column, const DataTypePtr& data_type PDateTime* date_time = arg->add_datetime_value(); if constexpr (nullable) { if (!column->is_null_at(row_num)) { - VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num)); + VecDateTimeValue v = + binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>( + column->get_int(row_num)); date_time->set_day(v.day()); date_time->set_month(v.month()); date_time->set_year(v.year()); @@ -261,7 +266,8 @@ void convert_col_to_pvalue(const ColumnPtr& column, const DataTypePtr& data_type date_time->set_second(v.second()); } } else { - VecDateTimeValue v = VecDateTimeValue(column->get_int(row_num)); + VecDateTimeValue v = binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>( + column->get_int(row_num)); date_time->set_day(v.day()); date_time->set_month(v.month()); date_time->set_year(v.year()); diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index 45dc20a43f..22f159bd8e 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -89,8 +89,9 @@ struct MakeDateImpl { auto& res_val = *reinterpret_cast<VecDateTimeValue*>(&res[i]); - VecDateTimeValue ts_value {l * 10000000000 + 101000000}; - ts_value.set_type(TIME_DATE); + VecDateTimeValue ts_value = VecDateTimeValue(); + ts_value.set_time(l, 1, 1, 0, 0, 0); + DateTimeVal ts_val; ts_value.to_datetime_val(&ts_val); if (ts_val.is_null) { diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index dd2e189493..728556186c 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -134,10 +134,10 @@ constexpr size_t max_char_length(const char* const* name, size_t end) { static constexpr const char* s_month_name[] = { "", "January", "February", "March", "April", "May", "June", - "July", "August", "September", "October", "November", "December", NULL}; + "July", "August", "September", "October", "November", "December", nullptr}; static constexpr const char* s_day_name[] = {"Monday", "Tuesday", "Wednesday", "Thursday", - "Friday", "Saturday", "Sunday", NULL}; + "Friday", "Saturday", "Sunday", nullptr}; static constexpr size_t MAX_DAY_NAME_LEN = max_char_length(s_day_name, std::size(s_day_name)); static constexpr size_t MAX_MONTH_NAME_LEN = max_char_length(s_month_name, std::size(s_month_name)); @@ -157,7 +157,27 @@ public: _month(0), // so this is a difference between Vectorization mode and Rowbatch mode with DateTimeValue; _year(0) {} // before int128 16 bytes ---> after int64 8 bytes - explicit VecDateTimeValue(int64_t t) { from_date_int64(t); } + // The data format of DATE/DATETIME is different in storage layer and execute layer. + // So we should use diffrent creator to get data from value. + // We should use create_from_olap_xxx only at binary data scaned from storage engine and convert to typed data. + // At other case, we just use binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>. + + // olap storage layer date data format: + // 64 bits binary data [year(remaining bits), month(4 bits), day(5 bits)] + // execute layer date/datetime and olap storage layer datetime data format: + // 8 bytes interger data [year(remaining digits), month(2 digits), day(2 digits), hour(2 digits), minute(2 digits) ,second(2 digits)] + + static VecDateTimeValue create_from_olap_date(uint64_t value) { + VecDateTimeValue date; + date.from_olap_date(value); + return date; + } + + static VecDateTimeValue create_from_olap_datetime(uint64_t value) { + VecDateTimeValue datetime; + datetime.from_olap_datetime(value); + return datetime; + } void set_time(uint32_t year, uint32_t month, uint32_t day, uint32_t hour, uint32_t minute, uint32_t second); @@ -594,7 +614,7 @@ private: char* to_date_buffer(char* to) const; char* to_time_buffer(char* to) const; - // Used to convert to uint64_t + // Used to convert to int64_t int64_t to_datetime_int64() const; int64_t to_date_int64() const; int64_t to_time_int64() const; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org