This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new cf5d194fe1 [enhancement](array-type) Split Array Offsets and String Offsets (#12341) cf5d194fe1 is described below commit cf5d194fe103633904c5159b8298535027e9bb30 Author: camby <104178...@qq.com> AuthorDate: Tue Sep 6 11:18:27 2022 +0800 [enhancement](array-type) Split Array Offsets and String Offsets (#12341) In old Doris version string offsets are 32bit, but it is not enough for Array type. If we change string offsets from 32bit to 64bit, there will be problem if we upgrade BE one by one. Because at the same time 32bit Offsets and 64 bit Offsets String will exist at the same time. As a result, we separate the Codes for Array Offsets. Co-authored-by: cambyzju <zhuxiaol...@baidu.com> --- be/src/vec/columns/column.h | 10 ++- be/src/vec/columns/column_array.cpp | 58 +++++++-------- be/src/vec/columns/column_array.h | 6 +- be/src/vec/columns/column_string.cpp | 3 +- be/src/vec/columns/columns_common.cpp | 87 ++++++++++++---------- be/src/vec/columns/columns_common.h | 13 ++-- be/src/vec/data_types/data_type_array.cpp | 18 ++--- .../functions/array/function_array_aggregation.cpp | 2 +- .../vec/functions/array/function_array_distinct.h | 28 +++---- .../vec/functions/array/function_array_element.h | 4 +- be/src/vec/functions/array/function_array_index.h | 6 +- be/src/vec/functions/array/function_array_join.h | 19 +++-- be/src/vec/functions/array/function_array_remove.h | 6 +- .../vec/functions/array/function_array_reverse.h | 8 +- be/src/vec/functions/array/function_array_sort.h | 36 ++++----- be/src/vec/functions/array/function_array_utils.h | 4 +- be/src/vec/functions/function_string.h | 6 +- be/test/vec/core/block_test.cpp | 8 +- be/test/vec/core/column_array_test.cpp | 30 ++++---- .../utils/arrow_column_to_doris_column_test.cpp | 8 +- 20 files changed, 191 insertions(+), 169 deletions(-) diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 4307153739..565a9416d7 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -340,12 +340,18 @@ public: virtual void get_permutation(bool reverse, size_t limit, int nan_direction_hint, Permutation& res) const = 0; + // 32bit offsets for string + using Offset = UInt32; + using Offsets = PaddedPODArray<Offset>; + + // 64bit offsets for array + using Offset64 = UInt64; + using Offsets64 = PaddedPODArray<Offset64>; + /** Copies each element according offsets parameter. * (i-th element should be copied offsets[i] - offsets[i - 1] times.) * It is necessary in ARRAY JOIN operation. */ - using Offset = UInt64; - using Offsets = PaddedPODArray<Offset>; virtual Ptr replicate(const Offsets& offsets) const = 0; virtual void replicate(const uint32_t* counts, size_t target_size, IColumn& column) const { diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 7ef683f651..c498b72345 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -55,7 +55,7 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& of } if (!offsets_concrete->empty() && nested_column) { - Offset last_offset = offsets_concrete->get_data().back(); + auto last_offset = offsets_concrete->get_data().back(); /// This will also prevent possible overflow in offset. if (nested_column->size() != last_offset) { @@ -93,7 +93,7 @@ MutableColumnPtr ColumnArray::clone_resized(size_t to_size) const { res->get_data().insert_range_from(get_data(), 0, get_offsets()[to_size - 1]); } else { /// Copy column and append empty arrays for extra elements. - Offset offset = 0; + Offset64 offset = 0; if (from_size > 0) { res->get_offsets().assign(get_offsets().begin(), get_offsets().end()); res->get_data().insert_range_from(get_data(), 0, get_data().size()); @@ -304,8 +304,8 @@ void ColumnArray::insert_range_from(const IColumn& src, size_t start, size_t len get_data().insert_range_from(src_concrete.get_data(), nested_offset, nested_length); - Offsets& cur_offsets = get_offsets(); - const Offsets& src_offsets = src_concrete.get_offsets(); + auto& cur_offsets = get_offsets(); + const auto& src_offsets = src_concrete.get_offsets(); if (start == 0 && cur_offsets.empty()) { cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length); @@ -355,10 +355,10 @@ ColumnPtr ColumnArray::filter_number(const Filter& filt, ssize_t result_size_hin auto res = ColumnArray::create(data->clone_empty()); auto& res_elems = assert_cast<ColumnVector<T>&>(res->get_data()).get_data(); - Offsets& res_offsets = res->get_offsets(); + auto& res_offsets = res->get_offsets(); - filter_arrays_impl<T>(assert_cast<const ColumnVector<T>&>(*data).get_data(), get_offsets(), - res_elems, res_offsets, filt, result_size_hint); + filter_arrays_impl<T, Offset64>(assert_cast<const ColumnVector<T>&>(*data).get_data(), + get_offsets(), res_elems, res_offsets, filt, result_size_hint); return res; } @@ -372,12 +372,12 @@ ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t result_size_hin const ColumnString& src_string = typeid_cast<const ColumnString&>(*data); const ColumnString::Chars& src_chars = src_string.get_chars(); - const Offsets& src_string_offsets = src_string.get_offsets(); - const Offsets& src_offsets = get_offsets(); + const auto& src_string_offsets = src_string.get_offsets(); + const auto& src_offsets = get_offsets(); ColumnString::Chars& res_chars = typeid_cast<ColumnString&>(res->get_data()).get_chars(); - Offsets& res_string_offsets = typeid_cast<ColumnString&>(res->get_data()).get_offsets(); - Offsets& res_offsets = res->get_offsets(); + auto& res_string_offsets = typeid_cast<ColumnString&>(res->get_data()).get_offsets(); + auto& res_offsets = res->get_offsets(); if (result_size_hint < 0) { res_chars.reserve(src_chars.size()); @@ -385,10 +385,10 @@ ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t result_size_hin res_offsets.reserve(col_size); } - Offset prev_src_offset = 0; + Offset64 prev_src_offset = 0; Offset prev_src_string_offset = 0; - Offset prev_res_offset = 0; + Offset64 prev_res_offset = 0; Offset prev_res_string_offset = 0; for (size_t i = 0; i < col_size; ++i) { @@ -450,7 +450,7 @@ ColumnPtr ColumnArray::filter_generic(const Filter& filt, ssize_t result_size_hi res->data = data->filter(nested_filt, nested_result_size_hint); - Offsets& res_offsets = res->get_offsets(); + auto& res_offsets = res->get_offsets(); if (result_size_hint) res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size); size_t current_offset = 0; @@ -566,18 +566,18 @@ ColumnPtr ColumnArray::replicate_number(const Offsets& replicate_offsets) const const typename ColumnVector<T>::Container& src_data = typeid_cast<const ColumnVector<T>&>(*data).get_data(); - const Offsets& src_offsets = get_offsets(); + const auto& src_offsets = get_offsets(); typename ColumnVector<T>::Container& res_data = typeid_cast<ColumnVector<T>&>(res_arr.get_data()).get_data(); - Offsets& res_offsets = res_arr.get_offsets(); + auto& res_offsets = res_arr.get_offsets(); res_data.reserve(data->size() / col_size * replicate_offsets.back()); res_offsets.reserve(replicate_offsets.back()); Offset prev_replicate_offset = 0; - Offset prev_data_offset = 0; - Offset current_new_offset = 0; + Offset64 prev_data_offset = 0; + Offset64 current_new_offset = 0; for (size_t i = 0; i < col_size; ++i) { size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset; @@ -614,12 +614,12 @@ ColumnPtr ColumnArray::replicate_string(const Offsets& replicate_offsets) const const ColumnString& src_string = typeid_cast<const ColumnString&>(*data); const ColumnString::Chars& src_chars = src_string.get_chars(); - const Offsets& src_string_offsets = src_string.get_offsets(); - const Offsets& src_offsets = get_offsets(); + const auto& src_string_offsets = src_string.get_offsets(); + const auto& src_offsets = get_offsets(); ColumnString::Chars& res_chars = typeid_cast<ColumnString&>(res_arr.get_data()).get_chars(); - Offsets& res_string_offsets = typeid_cast<ColumnString&>(res_arr.get_data()).get_offsets(); - Offsets& res_offsets = res_arr.get_offsets(); + auto& res_string_offsets = typeid_cast<ColumnString&>(res_arr.get_data()).get_offsets(); + auto& res_offsets = res_arr.get_offsets(); res_chars.reserve(src_chars.size() / col_size * replicate_offsets.back()); res_string_offsets.reserve(src_string_offsets.size() / col_size * replicate_offsets.back()); @@ -627,10 +627,10 @@ ColumnPtr ColumnArray::replicate_string(const Offsets& replicate_offsets) const Offset prev_replicate_offset = 0; - Offset prev_src_offset = 0; + Offset64 prev_src_offset = 0; Offset prev_src_string_offset = 0; - Offset current_res_offset = 0; + Offset64 current_res_offset = 0; Offset current_res_string_offset = 0; for (size_t i = 0; i < col_size; ++i) { @@ -682,15 +682,15 @@ ColumnPtr ColumnArray::replicate_const(const Offsets& replicate_offsets) const { if (0 == col_size) return clone_empty(); - const Offsets& src_offsets = get_offsets(); + const auto& src_offsets = get_offsets(); auto res_column_offsets = ColumnOffsets::create(); - Offsets& res_offsets = res_column_offsets->get_data(); + auto& res_offsets = res_column_offsets->get_data(); res_offsets.reserve(replicate_offsets.back()); Offset prev_replicate_offset = 0; - Offset prev_data_offset = 0; - Offset current_new_offset = 0; + Offset64 prev_data_offset = 0; + Offset64 current_new_offset = 0; for (size_t i = 0; i < col_size; ++i) { size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset; @@ -719,7 +719,7 @@ ColumnPtr ColumnArray::replicate_generic(const Offsets& replicate_offsets) const if (0 == col_size) return res; - IColumn::Offset prev_offset = 0; + Offset64 prev_offset = 0; for (size_t i = 0; i < col_size; ++i) { size_t size_to_replicate = replicate_offsets[i] - prev_offset; prev_offset = replicate_offsets[i]; diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 50f864fbb9..686089f4e9 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -67,7 +67,7 @@ public: } /** On the index i there is an offset to the beginning of the i + 1 -th element. */ - using ColumnOffsets = ColumnVector<Offset>; + using ColumnOffsets = ColumnVector<Offset64>; std::string get_name() const override; const char* get_family_name() const override { return "Array"; } @@ -118,11 +118,11 @@ public: IColumn& get_offsets_column() { return *offsets; } const IColumn& get_offsets_column() const { return *offsets; } - Offsets& ALWAYS_INLINE get_offsets() { + Offsets64& ALWAYS_INLINE get_offsets() { return assert_cast<ColumnOffsets&>(*offsets).get_data(); } - const Offsets& ALWAYS_INLINE get_offsets() const { + const Offsets64& ALWAYS_INLINE get_offsets() const { return assert_cast<const ColumnOffsets&>(*offsets).get_data(); } diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 3adf082ae0..c8b99e8ffa 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -111,7 +111,8 @@ ColumnPtr ColumnString::filter(const Filter& filt, ssize_t result_size_hint) con Chars& res_chars = res->chars; Offsets& res_offsets = res->offsets; - filter_arrays_impl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint); + filter_arrays_impl<UInt8, Offset>(chars, offsets, res_chars, res_offsets, filt, + result_size_hint); return res; } diff --git a/be/src/vec/columns/columns_common.cpp b/be/src/vec/columns/columns_common.cpp index 02183b9876..8ab45e112b 100644 --- a/be/src/vec/columns/columns_common.cpp +++ b/be/src/vec/columns/columns_common.cpp @@ -98,11 +98,12 @@ namespace { /// Implementation details of filterArraysImpl function, used as template parameter. /// Allow to build or not to build offsets array. +template <typename OT> struct ResultOffsetsBuilder { - IColumn::Offsets& res_offsets; - IColumn::Offset current_src_offset = 0; + PaddedPODArray<OT>& res_offsets; + OT current_src_offset = 0; - explicit ResultOffsetsBuilder(IColumn::Offsets* res_offsets_) : res_offsets(*res_offsets_) {} + explicit ResultOffsetsBuilder(PaddedPODArray<OT>* res_offsets_) : res_offsets(*res_offsets_) {} void reserve(ssize_t result_size_hint, size_t src_size) { res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size); @@ -114,12 +115,10 @@ struct ResultOffsetsBuilder { } template <size_t SIMD_BYTES> - void insert_chunk(const IColumn::Offset* src_offsets_pos, bool first, - IColumn::Offset chunk_offset, size_t chunk_size) { + void insert_chunk(const OT* src_offsets_pos, bool first, OT chunk_offset, size_t chunk_size) { const auto offsets_size_old = res_offsets.size(); res_offsets.resize_assume_reserved(offsets_size_old + SIMD_BYTES); - memcpy(&res_offsets[offsets_size_old], src_offsets_pos, - SIMD_BYTES * sizeof(IColumn::Offset)); + memcpy(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES * sizeof(OT)); if (!first) { /// difference between current and actual offset @@ -138,19 +137,20 @@ struct ResultOffsetsBuilder { } }; +template <typename OT> struct NoResultOffsetsBuilder { - explicit NoResultOffsetsBuilder(IColumn::Offsets*) {} + explicit NoResultOffsetsBuilder(PaddedPODArray<OT>*) {} void reserve(ssize_t, size_t) {} void insert_one(size_t) {} template <size_t SIMD_BYTES> - void insert_chunk(const IColumn::Offset*, bool, IColumn::Offset, size_t) {} + void insert_chunk(const OT*, bool, OT, size_t) {} }; -template <typename T, typename ResultOffsetsBuilder> +template <typename T, typename OT, typename ResultOffsetsBuilder> void filter_arrays_impl_generic(const PaddedPODArray<T>& src_elems, - const IColumn::Offsets& src_offsets, PaddedPODArray<T>& res_elems, - IColumn::Offsets* res_offsets, const IColumn::Filter& filt, + const PaddedPODArray<OT>& src_offsets, PaddedPODArray<T>& res_elems, + PaddedPODArray<OT>* res_offsets, const IColumn::Filter& filt, ssize_t result_size_hint) { const size_t size = src_offsets.size(); if (size != filt.size()) { @@ -175,7 +175,7 @@ void filter_arrays_impl_generic(const PaddedPODArray<T>& src_elems, const auto offsets_begin = offsets_pos; /// copy array ending at *end_offset_ptr - const auto copy_array = [&](const IColumn::Offset* offset_ptr) { + const auto copy_array = [&](const OT* offset_ptr) { const auto arr_offset = offset_ptr == offsets_begin ? 0 : offset_ptr[-1]; const auto arr_size = *offset_ptr - arr_offset; @@ -229,41 +229,52 @@ void filter_arrays_impl_generic(const PaddedPODArray<T>& src_elems, } } // namespace -template <typename T> -void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const IColumn::Offsets& src_offsets, - PaddedPODArray<T>& res_elems, IColumn::Offsets& res_offsets, +template <typename T, typename OT> +void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const PaddedPODArray<OT>& src_offsets, + PaddedPODArray<T>& res_elems, PaddedPODArray<OT>& res_offsets, const IColumn::Filter& filt, ssize_t result_size_hint) { - return filter_arrays_impl_generic<T, ResultOffsetsBuilder>( + return filter_arrays_impl_generic<T, OT, ResultOffsetsBuilder<OT>>( src_elems, src_offsets, res_elems, &res_offsets, filt, result_size_hint); } -template <typename T> +template <typename T, typename OT> void filter_arrays_impl_only_data(const PaddedPODArray<T>& src_elems, - const IColumn::Offsets& src_offsets, PaddedPODArray<T>& res_elems, - const IColumn::Filter& filt, ssize_t result_size_hint) { - return filter_arrays_impl_generic<T, NoResultOffsetsBuilder>(src_elems, src_offsets, res_elems, - nullptr, filt, result_size_hint); + const PaddedPODArray<OT>& src_offsets, + PaddedPODArray<T>& res_elems, const IColumn::Filter& filt, + ssize_t result_size_hint) { + return filter_arrays_impl_generic<T, OT, NoResultOffsetsBuilder<OT>>( + src_elems, src_offsets, res_elems, nullptr, filt, result_size_hint); } /// Explicit instantiations - not to place the implementation of the function above in the header file. -#define INSTANTIATE(TYPE) \ - template void filter_arrays_impl<TYPE>(const PaddedPODArray<TYPE>&, const IColumn::Offsets&, \ - PaddedPODArray<TYPE>&, IColumn::Offsets&, \ - const IColumn::Filter&, ssize_t); \ - template void filter_arrays_impl_only_data<TYPE>( \ - const PaddedPODArray<TYPE>&, const IColumn::Offsets&, PaddedPODArray<TYPE>&, \ +#define INSTANTIATE(TYPE, OFFTYPE) \ + template void filter_arrays_impl<TYPE, OFFTYPE>( \ + const PaddedPODArray<TYPE>&, const PaddedPODArray<OFFTYPE>&, PaddedPODArray<TYPE>&, \ + PaddedPODArray<OFFTYPE>&, const IColumn::Filter&, ssize_t); \ + template void filter_arrays_impl_only_data<TYPE, OFFTYPE>( \ + const PaddedPODArray<TYPE>&, const PaddedPODArray<OFFTYPE>&, PaddedPODArray<TYPE>&, \ const IColumn::Filter&, ssize_t); -INSTANTIATE(UInt8) -INSTANTIATE(UInt16) -INSTANTIATE(UInt32) -INSTANTIATE(UInt64) -INSTANTIATE(Int8) -INSTANTIATE(Int16) -INSTANTIATE(Int32) -INSTANTIATE(Int64) -INSTANTIATE(Float32) -INSTANTIATE(Float64) +INSTANTIATE(UInt8, IColumn::Offset) +INSTANTIATE(UInt8, IColumn::Offset64) +INSTANTIATE(UInt16, IColumn::Offset) +INSTANTIATE(UInt16, IColumn::Offset64) +INSTANTIATE(UInt32, IColumn::Offset) +INSTANTIATE(UInt32, IColumn::Offset64) +INSTANTIATE(UInt64, IColumn::Offset) +INSTANTIATE(UInt64, IColumn::Offset64) +INSTANTIATE(Int8, IColumn::Offset) +INSTANTIATE(Int8, IColumn::Offset64) +INSTANTIATE(Int16, IColumn::Offset) +INSTANTIATE(Int16, IColumn::Offset64) +INSTANTIATE(Int32, IColumn::Offset) +INSTANTIATE(Int32, IColumn::Offset64) +INSTANTIATE(Int64, IColumn::Offset) +INSTANTIATE(Int64, IColumn::Offset64) +INSTANTIATE(Float32, IColumn::Offset) +INSTANTIATE(Float32, IColumn::Offset64) +INSTANTIATE(Float64, IColumn::Offset) +INSTANTIATE(Float64, IColumn::Offset64) #undef INSTANTIATE diff --git a/be/src/vec/columns/columns_common.h b/be/src/vec/columns/columns_common.h index dc9116e343..7308816005 100644 --- a/be/src/vec/columns/columns_common.h +++ b/be/src/vec/columns/columns_common.h @@ -39,15 +39,16 @@ bool memory_is_zero(const void* data, size_t size); bool memory_is_byte(const void* data, size_t size, uint8_t byte); /// The general implementation of `filter` function for ColumnArray and ColumnString. -template <typename T> -void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const IColumn::Offsets& src_offsets, - PaddedPODArray<T>& res_elems, IColumn::Offsets& res_offsets, +template <typename T, typename OT> +void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const PaddedPODArray<OT>& src_offsets, + PaddedPODArray<T>& res_elems, PaddedPODArray<OT>& res_offsets, const IColumn::Filter& filt, ssize_t result_size_hint); /// Same as above, but not fills res_offsets. -template <typename T> +template <typename T, typename OT> void filter_arrays_impl_only_data(const PaddedPODArray<T>& src_elems, - const IColumn::Offsets& src_offsets, PaddedPODArray<T>& res_elems, - const IColumn::Filter& filt, ssize_t result_size_hint); + const PaddedPODArray<OT>& src_offsets, + PaddedPODArray<T>& res_elems, const IColumn::Filter& filt, + ssize_t result_size_hint); } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_array.cpp b/be/src/vec/data_types/data_type_array.cpp index 2fb0805e49..7301e6e0e5 100644 --- a/be/src/vec/data_types/data_type_array.cpp +++ b/be/src/vec/data_types/data_type_array.cpp @@ -58,7 +58,7 @@ size_t DataTypeArray::get_number_of_dimensions() const { int64_t DataTypeArray::get_uncompressed_serialized_bytes(const IColumn& column) const { auto ptr = column.convert_to_full_column_if_const(); const auto& data_column = assert_cast<const ColumnArray&>(*ptr.get()); - return sizeof(IColumn::Offset) * (column.size() + 1) + + return sizeof(IColumn::Offset64) * (column.size() + 1) + get_nested_type()->get_uncompressed_serialized_bytes(data_column.get_data()); } @@ -67,11 +67,11 @@ char* DataTypeArray::serialize(const IColumn& column, char* buf) const { const auto& data_column = assert_cast<const ColumnArray&>(*ptr.get()); // row num - *reinterpret_cast<IColumn::Offset*>(buf) = column.size(); - buf += sizeof(IColumn::Offset); + *reinterpret_cast<IColumn::Offset64*>(buf) = column.size(); + buf += sizeof(IColumn::Offset64); // offsets - memcpy(buf, data_column.get_offsets().data(), column.size() * sizeof(IColumn::Offset)); - buf += column.size() * sizeof(IColumn::Offset); + memcpy(buf, data_column.get_offsets().data(), column.size() * sizeof(IColumn::Offset64)); + buf += column.size() * sizeof(IColumn::Offset64); // children return get_nested_type()->serialize(data_column.get_data(), buf); } @@ -81,12 +81,12 @@ const char* DataTypeArray::deserialize(const char* buf, IColumn* column) const { auto& offsets = data_column->get_offsets(); // row num - IColumn::Offset row_num = *reinterpret_cast<const IColumn::Offset*>(buf); - buf += sizeof(IColumn::Offset); + IColumn::Offset64 row_num = *reinterpret_cast<const IColumn::Offset64*>(buf); + buf += sizeof(IColumn::Offset64); // offsets offsets.resize(row_num); - memcpy(offsets.data(), buf, sizeof(IColumn::Offset) * row_num); - buf += sizeof(IColumn::Offset) * row_num; + memcpy(offsets.data(), buf, sizeof(IColumn::Offset64) * row_num); + buf += sizeof(IColumn::Offset64) * row_num; // children return get_nested_type()->deserialize(buf, data_column->get_data_ptr()->assume_mutable()); } diff --git a/be/src/vec/functions/array/function_array_aggregation.cpp b/be/src/vec/functions/array/function_array_aggregation.cpp index 42351fd7eb..f09a0a72fa 100644 --- a/be/src/vec/functions/array/function_array_aggregation.cpp +++ b/be/src/vec/functions/array/function_array_aggregation.cpp @@ -176,7 +176,7 @@ struct ArrayAggregateImpl { template <typename Element> static bool execute_type(ColumnPtr& res_ptr, const DataTypePtr& type, const IColumn* data, - const ColumnArray::Offsets& offsets) { + const ColumnArray::Offsets64& offsets) { using ColVecType = ColumnVectorOrDecimal<Element>; using ResultType = ArrayAggregateResult<Element, operation>; using ColVecResultType = ColumnVectorOrDecimal<ResultType>; diff --git a/be/src/vec/functions/array/function_array_distinct.h b/be/src/vec/functions/array/function_array_distinct.h index 7d9c989c29..77e997aba6 100644 --- a/be/src/vec/functions/array/function_array_distinct.h +++ b/be/src/vec/functions/array/function_array_distinct.h @@ -71,7 +71,7 @@ public: auto dest_column_ptr = ColumnArray::create(nested_type->create_column(), ColumnArray::ColumnOffsets::create()); IColumn* dest_nested_column = &dest_column_ptr->get_data(); - ColumnArray::Offsets& dest_offsets = dest_column_ptr->get_offsets(); + auto& dest_offsets = dest_column_ptr->get_offsets(); DCHECK(dest_nested_column != nullptr); dest_nested_column->reserve(src_nested_column->size()); dest_offsets.reserve(input_rows_count); @@ -109,8 +109,8 @@ private: static constexpr size_t INITIAL_SIZE_DEGREE = 5; template <typename ColumnType> - bool _execute_number(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_number(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map) { using NestType = typename ColumnType::value_type; using ElementNativeType = typename NativeType<NestType>::Type; @@ -128,13 +128,13 @@ private: INITIAL_SIZE_DEGREE>; Set set; - ColumnArray::Offset prev_src_offset = 0; - ColumnArray::Offset res_offset = 0; + size_t prev_src_offset = 0; + size_t res_offset = 0; for (auto curr_src_offset : src_offsets) { set.clear(); size_t null_size = 0; - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { DCHECK(dest_null_map != nullptr); (*dest_null_map).push_back(true); @@ -162,8 +162,8 @@ private: return true; } - bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map) { const ColumnString* src_data_concrete = reinterpret_cast<const ColumnString*>(&src_column); if (!src_data_concrete) { @@ -178,13 +178,13 @@ private: using Set = HashSetWithStackMemory<StringRef, DefaultHash<StringRef>, INITIAL_SIZE_DEGREE>; Set set; - ColumnArray::Offset prev_src_offset = 0; - ColumnArray::Offset res_offset = 0; + size_t prev_src_offset = 0; + size_t res_offset = 0; for (auto curr_src_offset : src_offsets) { set.clear(); size_t null_size = 0; - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { DCHECK(dest_null_map != nullptr); // Note: here we need to update the offset of ColumnString @@ -221,8 +221,8 @@ private: return true; } - bool _execute_by_type(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_by_type(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map, DataTypePtr& nested_type) { bool res = false; @@ -268,4 +268,4 @@ private: } }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_element.h b/be/src/vec/functions/array/function_array_element.h index d04a1b605b..6722e09e9c 100644 --- a/be/src/vec/functions/array/function_array_element.h +++ b/be/src/vec/functions/array/function_array_element.h @@ -82,7 +82,7 @@ public: private: template <typename ColumnType> - ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const IColumn& nested_column, + ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, const UInt8* nested_null_map, UInt8* dst_null_map) { const auto& nested_data = reinterpret_cast<const ColumnType&>(nested_column).get_data(); @@ -123,7 +123,7 @@ private: return dst_column; } - ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const IColumn& nested_column, + ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, const UInt8* nested_null_map, UInt8* dst_null_map) { const auto& src_str_offs = diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index ab81490267..cd17feff8d 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -67,7 +67,7 @@ public: } private: - ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const UInt8* nested_null_map, + ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const UInt8* nested_null_map, const IColumn& nested_column, const IColumn& right_column) { // check array nested column type and get data const auto& str_offs = reinterpret_cast<const ColumnString&>(nested_column).get_offsets(); @@ -110,7 +110,7 @@ private: } template <typename NestedColumnType, typename RightColumnType> - ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const UInt8* nested_null_map, + ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const UInt8* nested_null_map, const IColumn& nested_column, const IColumn& right_column) { // check array nested column type and get data const auto& nested_data = @@ -144,7 +144,7 @@ private: } template <typename NestedColumnType> - ColumnPtr _execute_number_expanded(const ColumnArray::Offsets& offsets, + ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets, const UInt8* nested_null_map, const IColumn& nested_column, const IColumn& right_column) { if (check_column<ColumnUInt8>(right_column)) { diff --git a/be/src/vec/functions/array/function_array_join.h b/be/src/vec/functions/array/function_array_join.h index 452ba0df0b..180e65c21f 100644 --- a/be/src/vec/functions/array/function_array_join.h +++ b/be/src/vec/functions/array/function_array_join.h @@ -117,7 +117,8 @@ private: } template <typename ColumnType> - static bool _execute_number(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, + static bool _execute_number(const IColumn& src_column, + const ColumnArray::Offsets64& src_offsets, const UInt8* src_null_map, const std::string& sep_str, const std::string& null_replace_str, DataTypePtr& nested_type, ColumnString* dest_column_ptr) { @@ -129,10 +130,10 @@ private: return false; } - ColumnArray::Offset prev_src_offset = 0; + size_t prev_src_offset = 0; for (auto curr_src_offset : src_offsets) { std::string result_str; - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && src_null_map[j]) { if (null_replace_str.size() == 0) { continue; @@ -160,7 +161,8 @@ private: return true; } - static bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, + static bool _execute_string(const IColumn& src_column, + const ColumnArray::Offsets64& src_offsets, const UInt8* src_null_map, const std::string& sep_str, const std::string& null_replace_str, ColumnString* dest_column_ptr) { @@ -169,10 +171,10 @@ private: return false; } - ColumnArray::Offset prev_src_offset = 0; + size_t prev_src_offset = 0; for (auto curr_src_offset : src_offsets) { std::string result_str; - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && src_null_map[j]) { if (null_replace_str.size() == 0) { continue; @@ -193,7 +195,8 @@ private: return true; } - static bool _execute_by_type(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, + static bool _execute_by_type(const IColumn& src_column, + const ColumnArray::Offsets64& src_offsets, const UInt8* src_null_map, const std::string& sep_str, const std::string& null_replace_str, DataTypePtr& nested_type, ColumnString* dest_column_ptr) { @@ -240,4 +243,4 @@ private: } }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_remove.h b/be/src/vec/functions/array/function_array_remove.h index a291a53bc3..6565102fb7 100644 --- a/be/src/vec/functions/array/function_array_remove.h +++ b/be/src/vec/functions/array/function_array_remove.h @@ -65,7 +65,7 @@ public: private: template <typename NestedColumnType, typename RightColumnType> - ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const IColumn& nested_column, + ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const IColumn& right_column, const UInt8* nested_null_map) { // check array nested column type and get data const auto& src_data = reinterpret_cast<const NestedColumnType&>(nested_column).get_data(); @@ -135,7 +135,7 @@ private: return dst; } - ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const IColumn& nested_column, + ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const IColumn& right_column, const UInt8* nested_null_map) { // check array nested column type and get data const auto& src_offs = reinterpret_cast<const ColumnString&>(nested_column).get_offsets(); @@ -224,7 +224,7 @@ private: } template <typename NestedColumnType> - ColumnPtr _execute_number_expanded(const ColumnArray::Offsets& offsets, + ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const IColumn& right_column, const UInt8* nested_null_map) { if (check_column<ColumnUInt8>(right_column)) { diff --git a/be/src/vec/functions/array/function_array_reverse.h b/be/src/vec/functions/array/function_array_reverse.h index bc6891a29b..0714542614 100644 --- a/be/src/vec/functions/array/function_array_reverse.h +++ b/be/src/vec/functions/array/function_array_reverse.h @@ -58,10 +58,10 @@ struct ArrayReverseImpl { } static bool _execute_internal(const IColumn& src_column, - const ColumnArray::Offsets& src_offsets, IColumn& dest_column, - ColumnArray::Offsets& dest_offsets, const UInt8* src_null_map, + const ColumnArray::Offsets64& src_offsets, IColumn& dest_column, + ColumnArray::Offsets64& dest_offsets, const UInt8* src_null_map, ColumnUInt8::Container* dest_null_map) { - ColumnArray::Offset prev_src_offset = 0; + size_t prev_src_offset = 0; for (auto curr_src_offset : src_offsets) { size_t array_size = curr_src_offset - prev_src_offset; @@ -89,4 +89,4 @@ struct ArrayReverseImpl { } }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_sort.h b/be/src/vec/functions/array/function_array_sort.h index ccab8b8e40..87fa684b13 100644 --- a/be/src/vec/functions/array/function_array_sort.h +++ b/be/src/vec/functions/array/function_array_sort.h @@ -67,7 +67,7 @@ public: auto dest_column_ptr = ColumnArray::create(nested_type->create_column(), ColumnArray::ColumnOffsets::create()); IColumn* dest_nested_column = &dest_column_ptr->get_data(); - ColumnArray::Offsets& dest_offsets = dest_column_ptr->get_offsets(); + auto& dest_offsets = dest_column_ptr->get_offsets(); DCHECK(dest_nested_column != nullptr); dest_nested_column->reserve(src_nested_column->size()); dest_offsets.reserve(input_rows_count); @@ -103,15 +103,15 @@ public: private: // sort the non-null element according to the permutation template <typename SrcDataType> - void _sort_by_permutation(ColumnArray::Offset& prev_offset, - const ColumnArray::Offset& curr_offset, + void _sort_by_permutation(ColumnArray::Offset64& prev_offset, + const ColumnArray::Offset64& curr_offset, const SrcDataType* src_data_concrete, const IColumn& src_column, const NullMapType* src_null_map, IColumn::Permutation& permutation) { - for (ColumnArray::Offset j = prev_offset; j + 1 < curr_offset; ++j) { + for (size_t j = prev_offset; j + 1 < curr_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { continue; } - for (ColumnArray::Offset k = j + 1; k < curr_offset; ++k) { + for (size_t k = j + 1; k < curr_offset; ++k) { if (src_null_map && (*src_null_map)[k]) { continue; } @@ -128,8 +128,8 @@ private: } template <typename ColumnType> - bool _execute_number(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_number(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map) { using NestType = typename ColumnType::value_type; const ColumnType* src_data_concrete = reinterpret_cast<const ColumnType*>(&src_column); @@ -141,7 +141,7 @@ private: ColumnType& dest_data_concrete = reinterpret_cast<ColumnType&>(dest_column); PaddedPODArray<NestType>& dest_datas = dest_data_concrete.get_data(); - ColumnArray::Offset prev_src_offset = 0; + ColumnArray::Offset64 prev_src_offset = 0; IColumn::Permutation permutation(src_column.size()); for (size_t i = 0; i < src_column.size(); ++i) { permutation[i] = i; @@ -149,7 +149,7 @@ private: for (auto curr_src_offset : src_offsets) { // filter and insert null element first - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { DCHECK(dest_null_map != nullptr); (*dest_null_map).push_back(true); @@ -161,7 +161,7 @@ private: src_column, src_null_map, permutation); // insert non-null element after sort by permutation - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { continue; } @@ -178,8 +178,8 @@ private: return true; } - bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map) { const ColumnString* src_data_concrete = reinterpret_cast<const ColumnString*>(&src_column); if (!src_data_concrete) { @@ -191,7 +191,7 @@ private: ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets(); column_string_chars.reserve(src_column.size()); - ColumnArray::Offset prev_src_offset = 0; + size_t prev_src_offset = 0; IColumn::Permutation permutation(src_column.size()); for (size_t i = 0; i < src_column.size(); ++i) { permutation[i] = i; @@ -199,7 +199,7 @@ private: for (auto curr_src_offset : src_offsets) { // filter and insert null element first - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { DCHECK(dest_null_map != nullptr); column_string_offsets.push_back(column_string_offsets.back()); @@ -211,7 +211,7 @@ private: src_column, src_null_map, permutation); // insert non-null element after sort by permutation - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { continue; } @@ -238,8 +238,8 @@ private: return true; } - bool _execute_by_type(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_by_type(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map, DataTypePtr& nested_type) { bool res = false; @@ -285,4 +285,4 @@ private: } }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_utils.h b/be/src/vec/functions/array/function_array_utils.h index 0e0ebeb3b2..b4859f502b 100644 --- a/be/src/vec/functions/array/function_array_utils.h +++ b/be/src/vec/functions/array/function_array_utils.h @@ -35,7 +35,7 @@ public: public: const UInt8* array_nullmap_data = nullptr; const ColumnArray* array_col = nullptr; - const ColumnArray::Offsets* offsets_ptr = nullptr; + const ColumnArray::Offsets64* offsets_ptr = nullptr; const UInt8* nested_nullmap_data = nullptr; const IColumn* nested_col = nullptr; }; @@ -45,7 +45,7 @@ public: MutableColumnPtr array_nested_col = nullptr; ColumnUInt8::Container* nested_nullmap_data = nullptr; MutableColumnPtr offsets_col = nullptr; - ColumnArray::Offsets* offsets_ptr = nullptr; + ColumnArray::Offsets64* offsets_ptr = nullptr; IColumn* nested_col = nullptr; }; diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index e0f650d30c..c0cb214fcd 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -618,9 +618,9 @@ private: const auto& string_column = reinterpret_cast<const ColumnString&>(*array_nested_column); const Chars& string_src_chars = string_column.get_chars(); - const Offsets& src_string_offsets = string_column.get_offsets(); - const Offsets& src_array_offsets = array_column.get_offsets(); - ColumnArray::Offset current_src_array_offset = 0; + const auto& src_string_offsets = string_column.get_offsets(); + const auto& src_array_offsets = array_column.get_offsets(); + size_t current_src_array_offset = 0; // Concat string in array for (size_t i = 0; i < input_rows_count; ++i) { diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 3188808b54..c970ec1092 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -196,10 +196,10 @@ void block_to_pb( } void fill_block_with_array_int(vectorized::Block& block) { - auto off_column = vectorized::ColumnVector<vectorized::IColumn::Offset>::create(); + auto off_column = vectorized::ColumnVector<vectorized::IColumn::Offset64>::create(); auto data_column = vectorized::ColumnVector<int32_t>::create(); // init column array with [[1,2,3],[],[4],[5,6]] - std::vector<vectorized::IColumn::Offset> offs = {0, 3, 3, 4, 6}; + std::vector<vectorized::IColumn::Offset64> offs = {0, 3, 3, 4, 6}; std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -218,10 +218,10 @@ void fill_block_with_array_int(vectorized::Block& block) { } void fill_block_with_array_string(vectorized::Block& block) { - auto off_column = vectorized::ColumnVector<vectorized::IColumn::Offset>::create(); + auto off_column = vectorized::ColumnVector<vectorized::IColumn::Offset64>::create(); auto data_column = vectorized::ColumnString::create(); // init column array with [["abc","de"],["fg"],[], [""]]; - std::vector<vectorized::IColumn::Offset> offs = {0, 2, 3, 3, 4}; + std::vector<vectorized::IColumn::Offset64> offs = {0, 2, 3, 3, 4}; std::vector<std::string> vals = {"abc", "de", "fg", ""}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); diff --git a/be/test/vec/core/column_array_test.cpp b/be/test/vec/core/column_array_test.cpp index 60725501ab..7b71e0e4df 100644 --- a/be/test/vec/core/column_array_test.cpp +++ b/be/test/vec/core/column_array_test.cpp @@ -28,7 +28,7 @@ namespace doris::vectorized { -void check_array_offsets(const IColumn& arr, const std::vector<IColumn::Offset>& offs) { +void check_array_offsets(const IColumn& arr, const std::vector<IColumn::Offset64>& offs) { auto arr_col = check_and_get_column<ColumnArray>(arr); ASSERT_EQ(arr_col->size(), offs.size()); for (size_t i = 0; i < arr_col->size(); ++i) { @@ -57,10 +57,10 @@ void check_array_data(const IColumn& arr, const std::vector<std::string>& data) } TEST(ColumnArrayTest, IntArrayTest) { - auto off_column = ColumnVector<IColumn::Offset>::create(); + auto off_column = ColumnVector<IColumn::Offset64>::create(); auto data_column = ColumnVector<int32_t>::create(); // init column array with [[1,2,3],[],[4]] - std::vector<IColumn::Offset> offs = {0, 3, 3, 4}; + std::vector<IColumn::Offset64> offs = {0, 3, 3, 4}; std::vector<int32_t> vals = {1, 2, 3, 4}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -82,10 +82,10 @@ TEST(ColumnArrayTest, IntArrayTest) { } TEST(ColumnArrayTest, StringArrayTest) { - auto off_column = ColumnVector<IColumn::Offset>::create(); + auto off_column = ColumnVector<IColumn::Offset64>::create(); auto data_column = ColumnString::create(); // init column array with [["abc","d"],["ef"],[], [""]]; - std::vector<IColumn::Offset> offs = {0, 2, 3, 3, 4}; + std::vector<IColumn::Offset64> offs = {0, 2, 3, 3, 4}; std::vector<std::string> vals = {"abc", "d", "ef", ""}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -107,10 +107,10 @@ TEST(ColumnArrayTest, StringArrayTest) { } TEST(ColumnArrayTest, IntArrayPermuteTest) { - auto off_column = ColumnVector<IColumn::Offset>::create(); + auto off_column = ColumnVector<IColumn::Offset64>::create(); auto data_column = ColumnVector<int32_t>::create(); // init column array with [[1,2,3],[],[4],[5,6]] - std::vector<IColumn::Offset> offs = {0, 3, 3, 4, 6}; + std::vector<IColumn::Offset64> offs = {0, 3, 3, 4, 6}; std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -133,10 +133,10 @@ TEST(ColumnArrayTest, IntArrayPermuteTest) { } TEST(ColumnArrayTest, StringArrayPermuteTest) { - auto off_column = ColumnVector<IColumn::Offset>::create(); + auto off_column = ColumnVector<IColumn::Offset64>::create(); auto data_column = ColumnString::create(); // init column array with [["abc","d"],["ef"],[], [""]]; - std::vector<IColumn::Offset> offs = {0, 2, 3, 3, 4}; + std::vector<IColumn::Offset64> offs = {0, 2, 3, 3, 4}; std::vector<std::string> vals = {"abc", "d", "ef", ""}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -159,10 +159,10 @@ TEST(ColumnArrayTest, StringArrayPermuteTest) { } TEST(ColumnArrayTest, EmptyArrayPermuteTest) { - auto off_column = ColumnVector<IColumn::Offset>::create(); + auto off_column = ColumnVector<IColumn::Offset64>::create(); auto data_column = ColumnVector<int32_t>::create(); // init column array with [[],[],[],[]] - std::vector<IColumn::Offset> offs = {0, 0, 0, 0, 0}; + std::vector<IColumn::Offset64> offs = {0, 0, 0, 0, 0}; std::vector<int32_t> vals = {}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -185,10 +185,10 @@ TEST(ColumnArrayTest, EmptyArrayPermuteTest) { } TEST(ColumnArrayTest, IntArrayReplicateTest) { - auto off_column = ColumnVector<IColumn::Offset>::create(); + auto off_column = ColumnVector<IColumn::Offset64>::create(); auto data_column = ColumnVector<int32_t>::create(); // init column array with [[1,2,3],[],[4],[5,6]] - std::vector<IColumn::Offset> offs = {0, 3, 3, 4, 6}; + std::vector<IColumn::Offset64> offs = {0, 3, 3, 4, 6}; std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -209,10 +209,10 @@ TEST(ColumnArrayTest, IntArrayReplicateTest) { } TEST(ColumnArrayTest, StringArrayReplicateTest) { - auto off_column = ColumnVector<IColumn::Offset>::create(); + auto off_column = ColumnVector<IColumn::Offset64>::create(); auto data_column = ColumnString::create(); // init column array with [["abc","d"],["ef"],[], [""]]; - std::vector<IColumn::Offset> offs = {0, 2, 3, 3, 4}; + std::vector<IColumn::Offset64> offs = {0, 2, 3, 3, 4}; std::vector<std::string> vals = {"abc", "d", "ef", ""}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); diff --git a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp index afa5bf7c6f..6cc32c05a3 100644 --- a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp +++ b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp @@ -613,7 +613,7 @@ TEST(ArrowColumnToDorisColumnTest, test_binary) { template <typename ArrowValueType, bool is_nullable = false> static inline std::shared_ptr<arrow::Array> create_array_array( - std::vector<IColumn::Offset>& vec_offsets, std::vector<bool>& null_map, + std::vector<IColumn::Offset64>& vec_offsets, std::vector<bool>& null_map, std::shared_ptr<arrow::DataType> value_type, std::shared_ptr<arrow::Array> values, size_t& counter) { using offset_type = typename arrow::ListType::offset_type; @@ -646,7 +646,7 @@ static inline std::shared_ptr<arrow::Array> create_array_array( template <typename ArrowType, bool is_nullable> void test_arrow_to_array_column(ColumnWithTypeAndName& column, - std::vector<IColumn::Offset>& vec_offsets, + std::vector<IColumn::Offset64>& vec_offsets, std::vector<bool>& null_map, std::shared_ptr<arrow::DataType> value_type, std::shared_ptr<arrow::Array> values, const std::string& value, @@ -698,7 +698,7 @@ void test_arrow_to_array_column(ColumnWithTypeAndName& column, template <typename ArrowType, bool is_nullable> void test_array(const std::vector<std::string>& test_cases, size_t num_elements, - std::vector<IColumn::Offset>& vec_offsets, std::vector<bool>& null_map, + std::vector<IColumn::Offset64>& vec_offsets, std::vector<bool>& null_map, std::shared_ptr<arrow::DataType> value_type) { TypeDescriptor type(TYPE_ARRAY); type.children.push_back(TYPE_VARCHAR); @@ -724,7 +724,7 @@ void test_array(const std::vector<std::string>& test_cases, size_t num_elements, TEST(ArrowColumnToDorisColumnTest, test_array) { std::vector<std::string> test_cases = {"1.2345678", "-12.34567890", "99999999999.99999999", "-99999999999.99999999"}; - std::vector<IColumn::Offset> vec_offsets = {0, 3, 3, 4, 6, 6, 64}; + std::vector<IColumn::Offset64> vec_offsets = {0, 3, 3, 4, 6, 6, 64}; std::vector<bool> null_map = {false, true, false, false, false, false}; test_array<arrow::BinaryType, false>(test_cases, 64, vec_offsets, null_map, arrow::list(arrow::binary())); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org