This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new cf5d194fe1 [enhancement](array-type) Split Array Offsets and String 
Offsets (#12341)
cf5d194fe1 is described below

commit cf5d194fe103633904c5159b8298535027e9bb30
Author: camby <104178...@qq.com>
AuthorDate: Tue Sep 6 11:18:27 2022 +0800

    [enhancement](array-type) Split Array Offsets and String Offsets (#12341)
    
    In old Doris version string offsets are 32bit, but it is not enough for 
Array type.
    If we change string offsets from 32bit to 64bit, there will be problem if 
we upgrade BE one by one. Because at the same time 32bit Offsets and 64 bit 
Offsets String will exist at the same time.
    As a result, we separate the Codes for Array Offsets.
    Co-authored-by: cambyzju <zhuxiaol...@baidu.com>
---
 be/src/vec/columns/column.h                        | 10 ++-
 be/src/vec/columns/column_array.cpp                | 58 +++++++--------
 be/src/vec/columns/column_array.h                  |  6 +-
 be/src/vec/columns/column_string.cpp               |  3 +-
 be/src/vec/columns/columns_common.cpp              | 87 ++++++++++++----------
 be/src/vec/columns/columns_common.h                | 13 ++--
 be/src/vec/data_types/data_type_array.cpp          | 18 ++---
 .../functions/array/function_array_aggregation.cpp |  2 +-
 .../vec/functions/array/function_array_distinct.h  | 28 +++----
 .../vec/functions/array/function_array_element.h   |  4 +-
 be/src/vec/functions/array/function_array_index.h  |  6 +-
 be/src/vec/functions/array/function_array_join.h   | 19 +++--
 be/src/vec/functions/array/function_array_remove.h |  6 +-
 .../vec/functions/array/function_array_reverse.h   |  8 +-
 be/src/vec/functions/array/function_array_sort.h   | 36 ++++-----
 be/src/vec/functions/array/function_array_utils.h  |  4 +-
 be/src/vec/functions/function_string.h             |  6 +-
 be/test/vec/core/block_test.cpp                    |  8 +-
 be/test/vec/core/column_array_test.cpp             | 30 ++++----
 .../utils/arrow_column_to_doris_column_test.cpp    |  8 +-
 20 files changed, 191 insertions(+), 169 deletions(-)

diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 4307153739..565a9416d7 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -340,12 +340,18 @@ public:
     virtual void get_permutation(bool reverse, size_t limit, int 
nan_direction_hint,
                                  Permutation& res) const = 0;
 
+    // 32bit offsets for string
+    using Offset = UInt32;
+    using Offsets = PaddedPODArray<Offset>;
+
+    // 64bit offsets for array
+    using Offset64 = UInt64;
+    using Offsets64 = PaddedPODArray<Offset64>;
+
     /** Copies each element according offsets parameter.
       * (i-th element should be copied offsets[i] - offsets[i - 1] times.)
       * It is necessary in ARRAY JOIN operation.
       */
-    using Offset = UInt64;
-    using Offsets = PaddedPODArray<Offset>;
     virtual Ptr replicate(const Offsets& offsets) const = 0;
 
     virtual void replicate(const uint32_t* counts, size_t target_size, 
IColumn& column) const {
diff --git a/be/src/vec/columns/column_array.cpp 
b/be/src/vec/columns/column_array.cpp
index 7ef683f651..c498b72345 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -55,7 +55,7 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, 
MutableColumnPtr&& of
     }
 
     if (!offsets_concrete->empty() && nested_column) {
-        Offset last_offset = offsets_concrete->get_data().back();
+        auto last_offset = offsets_concrete->get_data().back();
 
         /// This will also prevent possible overflow in offset.
         if (nested_column->size() != last_offset) {
@@ -93,7 +93,7 @@ MutableColumnPtr ColumnArray::clone_resized(size_t to_size) 
const {
         res->get_data().insert_range_from(get_data(), 0, get_offsets()[to_size 
- 1]);
     } else {
         /// Copy column and append empty arrays for extra elements.
-        Offset offset = 0;
+        Offset64 offset = 0;
         if (from_size > 0) {
             res->get_offsets().assign(get_offsets().begin(), 
get_offsets().end());
             res->get_data().insert_range_from(get_data(), 0, 
get_data().size());
@@ -304,8 +304,8 @@ void ColumnArray::insert_range_from(const IColumn& src, 
size_t start, size_t len
 
     get_data().insert_range_from(src_concrete.get_data(), nested_offset, 
nested_length);
 
-    Offsets& cur_offsets = get_offsets();
-    const Offsets& src_offsets = src_concrete.get_offsets();
+    auto& cur_offsets = get_offsets();
+    const auto& src_offsets = src_concrete.get_offsets();
 
     if (start == 0 && cur_offsets.empty()) {
         cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length);
@@ -355,10 +355,10 @@ ColumnPtr ColumnArray::filter_number(const Filter& filt, 
ssize_t result_size_hin
     auto res = ColumnArray::create(data->clone_empty());
 
     auto& res_elems = 
assert_cast<ColumnVector<T>&>(res->get_data()).get_data();
-    Offsets& res_offsets = res->get_offsets();
+    auto& res_offsets = res->get_offsets();
 
-    filter_arrays_impl<T>(assert_cast<const 
ColumnVector<T>&>(*data).get_data(), get_offsets(),
-                          res_elems, res_offsets, filt, result_size_hint);
+    filter_arrays_impl<T, Offset64>(assert_cast<const 
ColumnVector<T>&>(*data).get_data(),
+                                    get_offsets(), res_elems, res_offsets, 
filt, result_size_hint);
     return res;
 }
 
@@ -372,12 +372,12 @@ ColumnPtr ColumnArray::filter_string(const Filter& filt, 
ssize_t result_size_hin
 
     const ColumnString& src_string = typeid_cast<const ColumnString&>(*data);
     const ColumnString::Chars& src_chars = src_string.get_chars();
-    const Offsets& src_string_offsets = src_string.get_offsets();
-    const Offsets& src_offsets = get_offsets();
+    const auto& src_string_offsets = src_string.get_offsets();
+    const auto& src_offsets = get_offsets();
 
     ColumnString::Chars& res_chars = 
typeid_cast<ColumnString&>(res->get_data()).get_chars();
-    Offsets& res_string_offsets = 
typeid_cast<ColumnString&>(res->get_data()).get_offsets();
-    Offsets& res_offsets = res->get_offsets();
+    auto& res_string_offsets = 
typeid_cast<ColumnString&>(res->get_data()).get_offsets();
+    auto& res_offsets = res->get_offsets();
 
     if (result_size_hint < 0) {
         res_chars.reserve(src_chars.size());
@@ -385,10 +385,10 @@ ColumnPtr ColumnArray::filter_string(const Filter& filt, 
ssize_t result_size_hin
         res_offsets.reserve(col_size);
     }
 
-    Offset prev_src_offset = 0;
+    Offset64 prev_src_offset = 0;
     Offset prev_src_string_offset = 0;
 
-    Offset prev_res_offset = 0;
+    Offset64 prev_res_offset = 0;
     Offset prev_res_string_offset = 0;
 
     for (size_t i = 0; i < col_size; ++i) {
@@ -450,7 +450,7 @@ ColumnPtr ColumnArray::filter_generic(const Filter& filt, 
ssize_t result_size_hi
 
     res->data = data->filter(nested_filt, nested_result_size_hint);
 
-    Offsets& res_offsets = res->get_offsets();
+    auto& res_offsets = res->get_offsets();
     if (result_size_hint) res_offsets.reserve(result_size_hint > 0 ? 
result_size_hint : size);
 
     size_t current_offset = 0;
@@ -566,18 +566,18 @@ ColumnPtr ColumnArray::replicate_number(const Offsets& 
replicate_offsets) const
 
     const typename ColumnVector<T>::Container& src_data =
             typeid_cast<const ColumnVector<T>&>(*data).get_data();
-    const Offsets& src_offsets = get_offsets();
+    const auto& src_offsets = get_offsets();
 
     typename ColumnVector<T>::Container& res_data =
             typeid_cast<ColumnVector<T>&>(res_arr.get_data()).get_data();
-    Offsets& res_offsets = res_arr.get_offsets();
+    auto& res_offsets = res_arr.get_offsets();
 
     res_data.reserve(data->size() / col_size * replicate_offsets.back());
     res_offsets.reserve(replicate_offsets.back());
 
     Offset prev_replicate_offset = 0;
-    Offset prev_data_offset = 0;
-    Offset current_new_offset = 0;
+    Offset64 prev_data_offset = 0;
+    Offset64 current_new_offset = 0;
 
     for (size_t i = 0; i < col_size; ++i) {
         size_t size_to_replicate = replicate_offsets[i] - 
prev_replicate_offset;
@@ -614,12 +614,12 @@ ColumnPtr ColumnArray::replicate_string(const Offsets& 
replicate_offsets) const
 
     const ColumnString& src_string = typeid_cast<const ColumnString&>(*data);
     const ColumnString::Chars& src_chars = src_string.get_chars();
-    const Offsets& src_string_offsets = src_string.get_offsets();
-    const Offsets& src_offsets = get_offsets();
+    const auto& src_string_offsets = src_string.get_offsets();
+    const auto& src_offsets = get_offsets();
 
     ColumnString::Chars& res_chars = 
typeid_cast<ColumnString&>(res_arr.get_data()).get_chars();
-    Offsets& res_string_offsets = 
typeid_cast<ColumnString&>(res_arr.get_data()).get_offsets();
-    Offsets& res_offsets = res_arr.get_offsets();
+    auto& res_string_offsets = 
typeid_cast<ColumnString&>(res_arr.get_data()).get_offsets();
+    auto& res_offsets = res_arr.get_offsets();
 
     res_chars.reserve(src_chars.size() / col_size * replicate_offsets.back());
     res_string_offsets.reserve(src_string_offsets.size() / col_size * 
replicate_offsets.back());
@@ -627,10 +627,10 @@ ColumnPtr ColumnArray::replicate_string(const Offsets& 
replicate_offsets) const
 
     Offset prev_replicate_offset = 0;
 
-    Offset prev_src_offset = 0;
+    Offset64 prev_src_offset = 0;
     Offset prev_src_string_offset = 0;
 
-    Offset current_res_offset = 0;
+    Offset64 current_res_offset = 0;
     Offset current_res_string_offset = 0;
 
     for (size_t i = 0; i < col_size; ++i) {
@@ -682,15 +682,15 @@ ColumnPtr ColumnArray::replicate_const(const Offsets& 
replicate_offsets) const {
 
     if (0 == col_size) return clone_empty();
 
-    const Offsets& src_offsets = get_offsets();
+    const auto& src_offsets = get_offsets();
 
     auto res_column_offsets = ColumnOffsets::create();
-    Offsets& res_offsets = res_column_offsets->get_data();
+    auto& res_offsets = res_column_offsets->get_data();
     res_offsets.reserve(replicate_offsets.back());
 
     Offset prev_replicate_offset = 0;
-    Offset prev_data_offset = 0;
-    Offset current_new_offset = 0;
+    Offset64 prev_data_offset = 0;
+    Offset64 current_new_offset = 0;
 
     for (size_t i = 0; i < col_size; ++i) {
         size_t size_to_replicate = replicate_offsets[i] - 
prev_replicate_offset;
@@ -719,7 +719,7 @@ ColumnPtr ColumnArray::replicate_generic(const Offsets& 
replicate_offsets) const
 
     if (0 == col_size) return res;
 
-    IColumn::Offset prev_offset = 0;
+    Offset64 prev_offset = 0;
     for (size_t i = 0; i < col_size; ++i) {
         size_t size_to_replicate = replicate_offsets[i] - prev_offset;
         prev_offset = replicate_offsets[i];
diff --git a/be/src/vec/columns/column_array.h 
b/be/src/vec/columns/column_array.h
index 50f864fbb9..686089f4e9 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -67,7 +67,7 @@ public:
     }
 
     /** On the index i there is an offset to the beginning of the i + 1 -th 
element. */
-    using ColumnOffsets = ColumnVector<Offset>;
+    using ColumnOffsets = ColumnVector<Offset64>;
 
     std::string get_name() const override;
     const char* get_family_name() const override { return "Array"; }
@@ -118,11 +118,11 @@ public:
     IColumn& get_offsets_column() { return *offsets; }
     const IColumn& get_offsets_column() const { return *offsets; }
 
-    Offsets& ALWAYS_INLINE get_offsets() {
+    Offsets64& ALWAYS_INLINE get_offsets() {
         return assert_cast<ColumnOffsets&>(*offsets).get_data();
     }
 
-    const Offsets& ALWAYS_INLINE get_offsets() const {
+    const Offsets64& ALWAYS_INLINE get_offsets() const {
         return assert_cast<const ColumnOffsets&>(*offsets).get_data();
     }
 
diff --git a/be/src/vec/columns/column_string.cpp 
b/be/src/vec/columns/column_string.cpp
index 3adf082ae0..c8b99e8ffa 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -111,7 +111,8 @@ ColumnPtr ColumnString::filter(const Filter& filt, ssize_t 
result_size_hint) con
     Chars& res_chars = res->chars;
     Offsets& res_offsets = res->offsets;
 
-    filter_arrays_impl<UInt8>(chars, offsets, res_chars, res_offsets, filt, 
result_size_hint);
+    filter_arrays_impl<UInt8, Offset>(chars, offsets, res_chars, res_offsets, 
filt,
+                                      result_size_hint);
     return res;
 }
 
diff --git a/be/src/vec/columns/columns_common.cpp 
b/be/src/vec/columns/columns_common.cpp
index 02183b9876..8ab45e112b 100644
--- a/be/src/vec/columns/columns_common.cpp
+++ b/be/src/vec/columns/columns_common.cpp
@@ -98,11 +98,12 @@ namespace {
 /// Implementation details of filterArraysImpl function, used as template 
parameter.
 /// Allow to build or not to build offsets array.
 
+template <typename OT>
 struct ResultOffsetsBuilder {
-    IColumn::Offsets& res_offsets;
-    IColumn::Offset current_src_offset = 0;
+    PaddedPODArray<OT>& res_offsets;
+    OT current_src_offset = 0;
 
-    explicit ResultOffsetsBuilder(IColumn::Offsets* res_offsets_) : 
res_offsets(*res_offsets_) {}
+    explicit ResultOffsetsBuilder(PaddedPODArray<OT>* res_offsets_) : 
res_offsets(*res_offsets_) {}
 
     void reserve(ssize_t result_size_hint, size_t src_size) {
         res_offsets.reserve(result_size_hint > 0 ? result_size_hint : 
src_size);
@@ -114,12 +115,10 @@ struct ResultOffsetsBuilder {
     }
 
     template <size_t SIMD_BYTES>
-    void insert_chunk(const IColumn::Offset* src_offsets_pos, bool first,
-                      IColumn::Offset chunk_offset, size_t chunk_size) {
+    void insert_chunk(const OT* src_offsets_pos, bool first, OT chunk_offset, 
size_t chunk_size) {
         const auto offsets_size_old = res_offsets.size();
         res_offsets.resize_assume_reserved(offsets_size_old + SIMD_BYTES);
-        memcpy(&res_offsets[offsets_size_old], src_offsets_pos,
-               SIMD_BYTES * sizeof(IColumn::Offset));
+        memcpy(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES * 
sizeof(OT));
 
         if (!first) {
             /// difference between current and actual offset
@@ -138,19 +137,20 @@ struct ResultOffsetsBuilder {
     }
 };
 
+template <typename OT>
 struct NoResultOffsetsBuilder {
-    explicit NoResultOffsetsBuilder(IColumn::Offsets*) {}
+    explicit NoResultOffsetsBuilder(PaddedPODArray<OT>*) {}
     void reserve(ssize_t, size_t) {}
     void insert_one(size_t) {}
 
     template <size_t SIMD_BYTES>
-    void insert_chunk(const IColumn::Offset*, bool, IColumn::Offset, size_t) {}
+    void insert_chunk(const OT*, bool, OT, size_t) {}
 };
 
-template <typename T, typename ResultOffsetsBuilder>
+template <typename T, typename OT, typename ResultOffsetsBuilder>
 void filter_arrays_impl_generic(const PaddedPODArray<T>& src_elems,
-                                const IColumn::Offsets& src_offsets, 
PaddedPODArray<T>& res_elems,
-                                IColumn::Offsets* res_offsets, const 
IColumn::Filter& filt,
+                                const PaddedPODArray<OT>& src_offsets, 
PaddedPODArray<T>& res_elems,
+                                PaddedPODArray<OT>* res_offsets, const 
IColumn::Filter& filt,
                                 ssize_t result_size_hint) {
     const size_t size = src_offsets.size();
     if (size != filt.size()) {
@@ -175,7 +175,7 @@ void filter_arrays_impl_generic(const PaddedPODArray<T>& 
src_elems,
     const auto offsets_begin = offsets_pos;
 
     /// copy array ending at *end_offset_ptr
-    const auto copy_array = [&](const IColumn::Offset* offset_ptr) {
+    const auto copy_array = [&](const OT* offset_ptr) {
         const auto arr_offset = offset_ptr == offsets_begin ? 0 : 
offset_ptr[-1];
         const auto arr_size = *offset_ptr - arr_offset;
 
@@ -229,41 +229,52 @@ void filter_arrays_impl_generic(const PaddedPODArray<T>& 
src_elems,
 }
 } // namespace
 
-template <typename T>
-void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const 
IColumn::Offsets& src_offsets,
-                        PaddedPODArray<T>& res_elems, IColumn::Offsets& 
res_offsets,
+template <typename T, typename OT>
+void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const 
PaddedPODArray<OT>& src_offsets,
+                        PaddedPODArray<T>& res_elems, PaddedPODArray<OT>& 
res_offsets,
                         const IColumn::Filter& filt, ssize_t result_size_hint) 
{
-    return filter_arrays_impl_generic<T, ResultOffsetsBuilder>(
+    return filter_arrays_impl_generic<T, OT, ResultOffsetsBuilder<OT>>(
             src_elems, src_offsets, res_elems, &res_offsets, filt, 
result_size_hint);
 }
 
-template <typename T>
+template <typename T, typename OT>
 void filter_arrays_impl_only_data(const PaddedPODArray<T>& src_elems,
-                                  const IColumn::Offsets& src_offsets, 
PaddedPODArray<T>& res_elems,
-                                  const IColumn::Filter& filt, ssize_t 
result_size_hint) {
-    return filter_arrays_impl_generic<T, NoResultOffsetsBuilder>(src_elems, 
src_offsets, res_elems,
-                                                                 nullptr, 
filt, result_size_hint);
+                                  const PaddedPODArray<OT>& src_offsets,
+                                  PaddedPODArray<T>& res_elems, const 
IColumn::Filter& filt,
+                                  ssize_t result_size_hint) {
+    return filter_arrays_impl_generic<T, OT, NoResultOffsetsBuilder<OT>>(
+            src_elems, src_offsets, res_elems, nullptr, filt, 
result_size_hint);
 }
 
 /// Explicit instantiations - not to place the implementation of the function 
above in the header file.
-#define INSTANTIATE(TYPE)                                                      
                  \
-    template void filter_arrays_impl<TYPE>(const PaddedPODArray<TYPE>&, const 
IColumn::Offsets&, \
-                                           PaddedPODArray<TYPE>&, 
IColumn::Offsets&,             \
-                                           const IColumn::Filter&, ssize_t);   
                  \
-    template void filter_arrays_impl_only_data<TYPE>(                          
                  \
-            const PaddedPODArray<TYPE>&, const IColumn::Offsets&, 
PaddedPODArray<TYPE>&,         \
+#define INSTANTIATE(TYPE, OFFTYPE)                                             
                 \
+    template void filter_arrays_impl<TYPE, OFFTYPE>(                           
                 \
+            const PaddedPODArray<TYPE>&, const PaddedPODArray<OFFTYPE>&, 
PaddedPODArray<TYPE>&, \
+            PaddedPODArray<OFFTYPE>&, const IColumn::Filter&, ssize_t);        
                 \
+    template void filter_arrays_impl_only_data<TYPE, OFFTYPE>(                 
                 \
+            const PaddedPODArray<TYPE>&, const PaddedPODArray<OFFTYPE>&, 
PaddedPODArray<TYPE>&, \
             const IColumn::Filter&, ssize_t);
 
-INSTANTIATE(UInt8)
-INSTANTIATE(UInt16)
-INSTANTIATE(UInt32)
-INSTANTIATE(UInt64)
-INSTANTIATE(Int8)
-INSTANTIATE(Int16)
-INSTANTIATE(Int32)
-INSTANTIATE(Int64)
-INSTANTIATE(Float32)
-INSTANTIATE(Float64)
+INSTANTIATE(UInt8, IColumn::Offset)
+INSTANTIATE(UInt8, IColumn::Offset64)
+INSTANTIATE(UInt16, IColumn::Offset)
+INSTANTIATE(UInt16, IColumn::Offset64)
+INSTANTIATE(UInt32, IColumn::Offset)
+INSTANTIATE(UInt32, IColumn::Offset64)
+INSTANTIATE(UInt64, IColumn::Offset)
+INSTANTIATE(UInt64, IColumn::Offset64)
+INSTANTIATE(Int8, IColumn::Offset)
+INSTANTIATE(Int8, IColumn::Offset64)
+INSTANTIATE(Int16, IColumn::Offset)
+INSTANTIATE(Int16, IColumn::Offset64)
+INSTANTIATE(Int32, IColumn::Offset)
+INSTANTIATE(Int32, IColumn::Offset64)
+INSTANTIATE(Int64, IColumn::Offset)
+INSTANTIATE(Int64, IColumn::Offset64)
+INSTANTIATE(Float32, IColumn::Offset)
+INSTANTIATE(Float32, IColumn::Offset64)
+INSTANTIATE(Float64, IColumn::Offset)
+INSTANTIATE(Float64, IColumn::Offset64)
 
 #undef INSTANTIATE
 
diff --git a/be/src/vec/columns/columns_common.h 
b/be/src/vec/columns/columns_common.h
index dc9116e343..7308816005 100644
--- a/be/src/vec/columns/columns_common.h
+++ b/be/src/vec/columns/columns_common.h
@@ -39,15 +39,16 @@ bool memory_is_zero(const void* data, size_t size);
 bool memory_is_byte(const void* data, size_t size, uint8_t byte);
 
 /// The general implementation of `filter` function for ColumnArray and 
ColumnString.
-template <typename T>
-void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const 
IColumn::Offsets& src_offsets,
-                        PaddedPODArray<T>& res_elems, IColumn::Offsets& 
res_offsets,
+template <typename T, typename OT>
+void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const 
PaddedPODArray<OT>& src_offsets,
+                        PaddedPODArray<T>& res_elems, PaddedPODArray<OT>& 
res_offsets,
                         const IColumn::Filter& filt, ssize_t result_size_hint);
 
 /// Same as above, but not fills res_offsets.
-template <typename T>
+template <typename T, typename OT>
 void filter_arrays_impl_only_data(const PaddedPODArray<T>& src_elems,
-                                  const IColumn::Offsets& src_offsets, 
PaddedPODArray<T>& res_elems,
-                                  const IColumn::Filter& filt, ssize_t 
result_size_hint);
+                                  const PaddedPODArray<OT>& src_offsets,
+                                  PaddedPODArray<T>& res_elems, const 
IColumn::Filter& filt,
+                                  ssize_t result_size_hint);
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_array.cpp 
b/be/src/vec/data_types/data_type_array.cpp
index 2fb0805e49..7301e6e0e5 100644
--- a/be/src/vec/data_types/data_type_array.cpp
+++ b/be/src/vec/data_types/data_type_array.cpp
@@ -58,7 +58,7 @@ size_t DataTypeArray::get_number_of_dimensions() const {
 int64_t DataTypeArray::get_uncompressed_serialized_bytes(const IColumn& 
column) const {
     auto ptr = column.convert_to_full_column_if_const();
     const auto& data_column = assert_cast<const ColumnArray&>(*ptr.get());
-    return sizeof(IColumn::Offset) * (column.size() + 1) +
+    return sizeof(IColumn::Offset64) * (column.size() + 1) +
            
get_nested_type()->get_uncompressed_serialized_bytes(data_column.get_data());
 }
 
@@ -67,11 +67,11 @@ char* DataTypeArray::serialize(const IColumn& column, char* 
buf) const {
     const auto& data_column = assert_cast<const ColumnArray&>(*ptr.get());
 
     // row num
-    *reinterpret_cast<IColumn::Offset*>(buf) = column.size();
-    buf += sizeof(IColumn::Offset);
+    *reinterpret_cast<IColumn::Offset64*>(buf) = column.size();
+    buf += sizeof(IColumn::Offset64);
     // offsets
-    memcpy(buf, data_column.get_offsets().data(), column.size() * 
sizeof(IColumn::Offset));
-    buf += column.size() * sizeof(IColumn::Offset);
+    memcpy(buf, data_column.get_offsets().data(), column.size() * 
sizeof(IColumn::Offset64));
+    buf += column.size() * sizeof(IColumn::Offset64);
     // children
     return get_nested_type()->serialize(data_column.get_data(), buf);
 }
@@ -81,12 +81,12 @@ const char* DataTypeArray::deserialize(const char* buf, 
IColumn* column) const {
     auto& offsets = data_column->get_offsets();
 
     // row num
-    IColumn::Offset row_num = *reinterpret_cast<const IColumn::Offset*>(buf);
-    buf += sizeof(IColumn::Offset);
+    IColumn::Offset64 row_num = *reinterpret_cast<const 
IColumn::Offset64*>(buf);
+    buf += sizeof(IColumn::Offset64);
     // offsets
     offsets.resize(row_num);
-    memcpy(offsets.data(), buf, sizeof(IColumn::Offset) * row_num);
-    buf += sizeof(IColumn::Offset) * row_num;
+    memcpy(offsets.data(), buf, sizeof(IColumn::Offset64) * row_num);
+    buf += sizeof(IColumn::Offset64) * row_num;
     // children
     return get_nested_type()->deserialize(buf, 
data_column->get_data_ptr()->assume_mutable());
 }
diff --git a/be/src/vec/functions/array/function_array_aggregation.cpp 
b/be/src/vec/functions/array/function_array_aggregation.cpp
index 42351fd7eb..f09a0a72fa 100644
--- a/be/src/vec/functions/array/function_array_aggregation.cpp
+++ b/be/src/vec/functions/array/function_array_aggregation.cpp
@@ -176,7 +176,7 @@ struct ArrayAggregateImpl {
 
     template <typename Element>
     static bool execute_type(ColumnPtr& res_ptr, const DataTypePtr& type, 
const IColumn* data,
-                             const ColumnArray::Offsets& offsets) {
+                             const ColumnArray::Offsets64& offsets) {
         using ColVecType = ColumnVectorOrDecimal<Element>;
         using ResultType = ArrayAggregateResult<Element, operation>;
         using ColVecResultType = ColumnVectorOrDecimal<ResultType>;
diff --git a/be/src/vec/functions/array/function_array_distinct.h 
b/be/src/vec/functions/array/function_array_distinct.h
index 7d9c989c29..77e997aba6 100644
--- a/be/src/vec/functions/array/function_array_distinct.h
+++ b/be/src/vec/functions/array/function_array_distinct.h
@@ -71,7 +71,7 @@ public:
         auto dest_column_ptr = 
ColumnArray::create(nested_type->create_column(),
                                                    
ColumnArray::ColumnOffsets::create());
         IColumn* dest_nested_column = &dest_column_ptr->get_data();
-        ColumnArray::Offsets& dest_offsets = dest_column_ptr->get_offsets();
+        auto& dest_offsets = dest_column_ptr->get_offsets();
         DCHECK(dest_nested_column != nullptr);
         dest_nested_column->reserve(src_nested_column->size());
         dest_offsets.reserve(input_rows_count);
@@ -109,8 +109,8 @@ private:
     static constexpr size_t INITIAL_SIZE_DEGREE = 5;
 
     template <typename ColumnType>
-    bool _execute_number(const IColumn& src_column, const 
ColumnArray::Offsets& src_offsets,
-                         IColumn& dest_column, ColumnArray::Offsets& 
dest_offsets,
+    bool _execute_number(const IColumn& src_column, const 
ColumnArray::Offsets64& src_offsets,
+                         IColumn& dest_column, ColumnArray::Offsets64& 
dest_offsets,
                          const NullMapType* src_null_map, NullMapType* 
dest_null_map) {
         using NestType = typename ColumnType::value_type;
         using ElementNativeType = typename NativeType<NestType>::Type;
@@ -128,13 +128,13 @@ private:
                                            INITIAL_SIZE_DEGREE>;
         Set set;
 
-        ColumnArray::Offset prev_src_offset = 0;
-        ColumnArray::Offset res_offset = 0;
+        size_t prev_src_offset = 0;
+        size_t res_offset = 0;
 
         for (auto curr_src_offset : src_offsets) {
             set.clear();
             size_t null_size = 0;
-            for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; 
++j) {
+            for (size_t j = prev_src_offset; j < curr_src_offset; ++j) {
                 if (src_null_map && (*src_null_map)[j]) {
                     DCHECK(dest_null_map != nullptr);
                     (*dest_null_map).push_back(true);
@@ -162,8 +162,8 @@ private:
         return true;
     }
 
-    bool _execute_string(const IColumn& src_column, const 
ColumnArray::Offsets& src_offsets,
-                         IColumn& dest_column, ColumnArray::Offsets& 
dest_offsets,
+    bool _execute_string(const IColumn& src_column, const 
ColumnArray::Offsets64& src_offsets,
+                         IColumn& dest_column, ColumnArray::Offsets64& 
dest_offsets,
                          const NullMapType* src_null_map, NullMapType* 
dest_null_map) {
         const ColumnString* src_data_concrete = reinterpret_cast<const 
ColumnString*>(&src_column);
         if (!src_data_concrete) {
@@ -178,13 +178,13 @@ private:
         using Set = HashSetWithStackMemory<StringRef, DefaultHash<StringRef>, 
INITIAL_SIZE_DEGREE>;
         Set set;
 
-        ColumnArray::Offset prev_src_offset = 0;
-        ColumnArray::Offset res_offset = 0;
+        size_t prev_src_offset = 0;
+        size_t res_offset = 0;
 
         for (auto curr_src_offset : src_offsets) {
             set.clear();
             size_t null_size = 0;
-            for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; 
++j) {
+            for (size_t j = prev_src_offset; j < curr_src_offset; ++j) {
                 if (src_null_map && (*src_null_map)[j]) {
                     DCHECK(dest_null_map != nullptr);
                     // Note: here we need to update the offset of ColumnString
@@ -221,8 +221,8 @@ private:
         return true;
     }
 
-    bool _execute_by_type(const IColumn& src_column, const 
ColumnArray::Offsets& src_offsets,
-                          IColumn& dest_column, ColumnArray::Offsets& 
dest_offsets,
+    bool _execute_by_type(const IColumn& src_column, const 
ColumnArray::Offsets64& src_offsets,
+                          IColumn& dest_column, ColumnArray::Offsets64& 
dest_offsets,
                           const NullMapType* src_null_map, NullMapType* 
dest_null_map,
                           DataTypePtr& nested_type) {
         bool res = false;
@@ -268,4 +268,4 @@ private:
     }
 };
 
-} // namespace doris::vectorized
\ No newline at end of file
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_element.h 
b/be/src/vec/functions/array/function_array_element.h
index d04a1b605b..6722e09e9c 100644
--- a/be/src/vec/functions/array/function_array_element.h
+++ b/be/src/vec/functions/array/function_array_element.h
@@ -82,7 +82,7 @@ public:
 
 private:
     template <typename ColumnType>
-    ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const 
IColumn& nested_column,
+    ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const 
IColumn& nested_column,
                               const UInt8* arr_null_map, const IColumn& 
indices,
                               const UInt8* nested_null_map, UInt8* 
dst_null_map) {
         const auto& nested_data = reinterpret_cast<const 
ColumnType&>(nested_column).get_data();
@@ -123,7 +123,7 @@ private:
         return dst_column;
     }
 
-    ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const 
IColumn& nested_column,
+    ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const 
IColumn& nested_column,
                               const UInt8* arr_null_map, const IColumn& 
indices,
                               const UInt8* nested_null_map, UInt8* 
dst_null_map) {
         const auto& src_str_offs =
diff --git a/be/src/vec/functions/array/function_array_index.h 
b/be/src/vec/functions/array/function_array_index.h
index ab81490267..cd17feff8d 100644
--- a/be/src/vec/functions/array/function_array_index.h
+++ b/be/src/vec/functions/array/function_array_index.h
@@ -67,7 +67,7 @@ public:
     }
 
 private:
-    ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const 
UInt8* nested_null_map,
+    ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const 
UInt8* nested_null_map,
                               const IColumn& nested_column, const IColumn& 
right_column) {
         // check array nested column type and get data
         const auto& str_offs = reinterpret_cast<const 
ColumnString&>(nested_column).get_offsets();
@@ -110,7 +110,7 @@ private:
     }
 
     template <typename NestedColumnType, typename RightColumnType>
-    ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const 
UInt8* nested_null_map,
+    ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const 
UInt8* nested_null_map,
                               const IColumn& nested_column, const IColumn& 
right_column) {
         // check array nested column type and get data
         const auto& nested_data =
@@ -144,7 +144,7 @@ private:
     }
 
     template <typename NestedColumnType>
-    ColumnPtr _execute_number_expanded(const ColumnArray::Offsets& offsets,
+    ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets,
                                        const UInt8* nested_null_map, const 
IColumn& nested_column,
                                        const IColumn& right_column) {
         if (check_column<ColumnUInt8>(right_column)) {
diff --git a/be/src/vec/functions/array/function_array_join.h 
b/be/src/vec/functions/array/function_array_join.h
index 452ba0df0b..180e65c21f 100644
--- a/be/src/vec/functions/array/function_array_join.h
+++ b/be/src/vec/functions/array/function_array_join.h
@@ -117,7 +117,8 @@ private:
     }
 
     template <typename ColumnType>
-    static bool _execute_number(const IColumn& src_column, const 
ColumnArray::Offsets& src_offsets,
+    static bool _execute_number(const IColumn& src_column,
+                                const ColumnArray::Offsets64& src_offsets,
                                 const UInt8* src_null_map, const std::string& 
sep_str,
                                 const std::string& null_replace_str, 
DataTypePtr& nested_type,
                                 ColumnString* dest_column_ptr) {
@@ -129,10 +130,10 @@ private:
             return false;
         }
 
-        ColumnArray::Offset prev_src_offset = 0;
+        size_t prev_src_offset = 0;
         for (auto curr_src_offset : src_offsets) {
             std::string result_str;
-            for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; 
++j) {
+            for (size_t j = prev_src_offset; j < curr_src_offset; ++j) {
                 if (src_null_map && src_null_map[j]) {
                     if (null_replace_str.size() == 0) {
                         continue;
@@ -160,7 +161,8 @@ private:
         return true;
     }
 
-    static bool _execute_string(const IColumn& src_column, const 
ColumnArray::Offsets& src_offsets,
+    static bool _execute_string(const IColumn& src_column,
+                                const ColumnArray::Offsets64& src_offsets,
                                 const UInt8* src_null_map, const std::string& 
sep_str,
                                 const std::string& null_replace_str,
                                 ColumnString* dest_column_ptr) {
@@ -169,10 +171,10 @@ private:
             return false;
         }
 
-        ColumnArray::Offset prev_src_offset = 0;
+        size_t prev_src_offset = 0;
         for (auto curr_src_offset : src_offsets) {
             std::string result_str;
-            for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; 
++j) {
+            for (size_t j = prev_src_offset; j < curr_src_offset; ++j) {
                 if (src_null_map && src_null_map[j]) {
                     if (null_replace_str.size() == 0) {
                         continue;
@@ -193,7 +195,8 @@ private:
         return true;
     }
 
-    static bool _execute_by_type(const IColumn& src_column, const 
ColumnArray::Offsets& src_offsets,
+    static bool _execute_by_type(const IColumn& src_column,
+                                 const ColumnArray::Offsets64& src_offsets,
                                  const UInt8* src_null_map, const std::string& 
sep_str,
                                  const std::string& null_replace_str, 
DataTypePtr& nested_type,
                                  ColumnString* dest_column_ptr) {
@@ -240,4 +243,4 @@ private:
     }
 };
 
-} // namespace doris::vectorized
\ No newline at end of file
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_remove.h 
b/be/src/vec/functions/array/function_array_remove.h
index a291a53bc3..6565102fb7 100644
--- a/be/src/vec/functions/array/function_array_remove.h
+++ b/be/src/vec/functions/array/function_array_remove.h
@@ -65,7 +65,7 @@ public:
 
 private:
     template <typename NestedColumnType, typename RightColumnType>
-    ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const 
IColumn& nested_column,
+    ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const 
IColumn& nested_column,
                               const IColumn& right_column, const UInt8* 
nested_null_map) {
         // check array nested column type and get data
         const auto& src_data = reinterpret_cast<const 
NestedColumnType&>(nested_column).get_data();
@@ -135,7 +135,7 @@ private:
         return dst;
     }
 
-    ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const 
IColumn& nested_column,
+    ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const 
IColumn& nested_column,
                               const IColumn& right_column, const UInt8* 
nested_null_map) {
         // check array nested column type and get data
         const auto& src_offs = reinterpret_cast<const 
ColumnString&>(nested_column).get_offsets();
@@ -224,7 +224,7 @@ private:
     }
 
     template <typename NestedColumnType>
-    ColumnPtr _execute_number_expanded(const ColumnArray::Offsets& offsets,
+    ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets,
                                        const IColumn& nested_column, const 
IColumn& right_column,
                                        const UInt8* nested_null_map) {
         if (check_column<ColumnUInt8>(right_column)) {
diff --git a/be/src/vec/functions/array/function_array_reverse.h 
b/be/src/vec/functions/array/function_array_reverse.h
index bc6891a29b..0714542614 100644
--- a/be/src/vec/functions/array/function_array_reverse.h
+++ b/be/src/vec/functions/array/function_array_reverse.h
@@ -58,10 +58,10 @@ struct ArrayReverseImpl {
     }
 
     static bool _execute_internal(const IColumn& src_column,
-                                  const ColumnArray::Offsets& src_offsets, 
IColumn& dest_column,
-                                  ColumnArray::Offsets& dest_offsets, const 
UInt8* src_null_map,
+                                  const ColumnArray::Offsets64& src_offsets, 
IColumn& dest_column,
+                                  ColumnArray::Offsets64& dest_offsets, const 
UInt8* src_null_map,
                                   ColumnUInt8::Container* dest_null_map) {
-        ColumnArray::Offset prev_src_offset = 0;
+        size_t prev_src_offset = 0;
 
         for (auto curr_src_offset : src_offsets) {
             size_t array_size = curr_src_offset - prev_src_offset;
@@ -89,4 +89,4 @@ struct ArrayReverseImpl {
     }
 };
 
-} // namespace doris::vectorized
\ No newline at end of file
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_sort.h 
b/be/src/vec/functions/array/function_array_sort.h
index ccab8b8e40..87fa684b13 100644
--- a/be/src/vec/functions/array/function_array_sort.h
+++ b/be/src/vec/functions/array/function_array_sort.h
@@ -67,7 +67,7 @@ public:
         auto dest_column_ptr = 
ColumnArray::create(nested_type->create_column(),
                                                    
ColumnArray::ColumnOffsets::create());
         IColumn* dest_nested_column = &dest_column_ptr->get_data();
-        ColumnArray::Offsets& dest_offsets = dest_column_ptr->get_offsets();
+        auto& dest_offsets = dest_column_ptr->get_offsets();
         DCHECK(dest_nested_column != nullptr);
         dest_nested_column->reserve(src_nested_column->size());
         dest_offsets.reserve(input_rows_count);
@@ -103,15 +103,15 @@ public:
 private:
     // sort the non-null element according to the permutation
     template <typename SrcDataType>
-    void _sort_by_permutation(ColumnArray::Offset& prev_offset,
-                              const ColumnArray::Offset& curr_offset,
+    void _sort_by_permutation(ColumnArray::Offset64& prev_offset,
+                              const ColumnArray::Offset64& curr_offset,
                               const SrcDataType* src_data_concrete, const 
IColumn& src_column,
                               const NullMapType* src_null_map, 
IColumn::Permutation& permutation) {
-        for (ColumnArray::Offset j = prev_offset; j + 1 < curr_offset; ++j) {
+        for (size_t j = prev_offset; j + 1 < curr_offset; ++j) {
             if (src_null_map && (*src_null_map)[j]) {
                 continue;
             }
-            for (ColumnArray::Offset k = j + 1; k < curr_offset; ++k) {
+            for (size_t k = j + 1; k < curr_offset; ++k) {
                 if (src_null_map && (*src_null_map)[k]) {
                     continue;
                 }
@@ -128,8 +128,8 @@ private:
     }
 
     template <typename ColumnType>
-    bool _execute_number(const IColumn& src_column, const 
ColumnArray::Offsets& src_offsets,
-                         IColumn& dest_column, ColumnArray::Offsets& 
dest_offsets,
+    bool _execute_number(const IColumn& src_column, const 
ColumnArray::Offsets64& src_offsets,
+                         IColumn& dest_column, ColumnArray::Offsets64& 
dest_offsets,
                          const NullMapType* src_null_map, NullMapType* 
dest_null_map) {
         using NestType = typename ColumnType::value_type;
         const ColumnType* src_data_concrete = reinterpret_cast<const 
ColumnType*>(&src_column);
@@ -141,7 +141,7 @@ private:
         ColumnType& dest_data_concrete = 
reinterpret_cast<ColumnType&>(dest_column);
         PaddedPODArray<NestType>& dest_datas = dest_data_concrete.get_data();
 
-        ColumnArray::Offset prev_src_offset = 0;
+        ColumnArray::Offset64 prev_src_offset = 0;
         IColumn::Permutation permutation(src_column.size());
         for (size_t i = 0; i < src_column.size(); ++i) {
             permutation[i] = i;
@@ -149,7 +149,7 @@ private:
 
         for (auto curr_src_offset : src_offsets) {
             // filter and insert null element first
-            for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; 
++j) {
+            for (size_t j = prev_src_offset; j < curr_src_offset; ++j) {
                 if (src_null_map && (*src_null_map)[j]) {
                     DCHECK(dest_null_map != nullptr);
                     (*dest_null_map).push_back(true);
@@ -161,7 +161,7 @@ private:
                                              src_column, src_null_map, 
permutation);
 
             // insert non-null element after sort by permutation
-            for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; 
++j) {
+            for (size_t j = prev_src_offset; j < curr_src_offset; ++j) {
                 if (src_null_map && (*src_null_map)[j]) {
                     continue;
                 }
@@ -178,8 +178,8 @@ private:
         return true;
     }
 
-    bool _execute_string(const IColumn& src_column, const 
ColumnArray::Offsets& src_offsets,
-                         IColumn& dest_column, ColumnArray::Offsets& 
dest_offsets,
+    bool _execute_string(const IColumn& src_column, const 
ColumnArray::Offsets64& src_offsets,
+                         IColumn& dest_column, ColumnArray::Offsets64& 
dest_offsets,
                          const NullMapType* src_null_map, NullMapType* 
dest_null_map) {
         const ColumnString* src_data_concrete = reinterpret_cast<const 
ColumnString*>(&src_column);
         if (!src_data_concrete) {
@@ -191,7 +191,7 @@ private:
         ColumnString::Offsets& column_string_offsets = 
dest_column_string.get_offsets();
         column_string_chars.reserve(src_column.size());
 
-        ColumnArray::Offset prev_src_offset = 0;
+        size_t prev_src_offset = 0;
         IColumn::Permutation permutation(src_column.size());
         for (size_t i = 0; i < src_column.size(); ++i) {
             permutation[i] = i;
@@ -199,7 +199,7 @@ private:
 
         for (auto curr_src_offset : src_offsets) {
             // filter and insert null element first
-            for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; 
++j) {
+            for (size_t j = prev_src_offset; j < curr_src_offset; ++j) {
                 if (src_null_map && (*src_null_map)[j]) {
                     DCHECK(dest_null_map != nullptr);
                     
column_string_offsets.push_back(column_string_offsets.back());
@@ -211,7 +211,7 @@ private:
                                                src_column, src_null_map, 
permutation);
 
             // insert non-null element after sort by permutation
-            for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; 
++j) {
+            for (size_t j = prev_src_offset; j < curr_src_offset; ++j) {
                 if (src_null_map && (*src_null_map)[j]) {
                     continue;
                 }
@@ -238,8 +238,8 @@ private:
         return true;
     }
 
-    bool _execute_by_type(const IColumn& src_column, const 
ColumnArray::Offsets& src_offsets,
-                          IColumn& dest_column, ColumnArray::Offsets& 
dest_offsets,
+    bool _execute_by_type(const IColumn& src_column, const 
ColumnArray::Offsets64& src_offsets,
+                          IColumn& dest_column, ColumnArray::Offsets64& 
dest_offsets,
                           const NullMapType* src_null_map, NullMapType* 
dest_null_map,
                           DataTypePtr& nested_type) {
         bool res = false;
@@ -285,4 +285,4 @@ private:
     }
 };
 
-} // namespace doris::vectorized
\ No newline at end of file
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_utils.h 
b/be/src/vec/functions/array/function_array_utils.h
index 0e0ebeb3b2..b4859f502b 100644
--- a/be/src/vec/functions/array/function_array_utils.h
+++ b/be/src/vec/functions/array/function_array_utils.h
@@ -35,7 +35,7 @@ public:
 public:
     const UInt8* array_nullmap_data = nullptr;
     const ColumnArray* array_col = nullptr;
-    const ColumnArray::Offsets* offsets_ptr = nullptr;
+    const ColumnArray::Offsets64* offsets_ptr = nullptr;
     const UInt8* nested_nullmap_data = nullptr;
     const IColumn* nested_col = nullptr;
 };
@@ -45,7 +45,7 @@ public:
     MutableColumnPtr array_nested_col = nullptr;
     ColumnUInt8::Container* nested_nullmap_data = nullptr;
     MutableColumnPtr offsets_col = nullptr;
-    ColumnArray::Offsets* offsets_ptr = nullptr;
+    ColumnArray::Offsets64* offsets_ptr = nullptr;
     IColumn* nested_col = nullptr;
 };
 
diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index e0f650d30c..c0cb214fcd 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -618,9 +618,9 @@ private:
 
         const auto& string_column = reinterpret_cast<const 
ColumnString&>(*array_nested_column);
         const Chars& string_src_chars = string_column.get_chars();
-        const Offsets& src_string_offsets = string_column.get_offsets();
-        const Offsets& src_array_offsets = array_column.get_offsets();
-        ColumnArray::Offset current_src_array_offset = 0;
+        const auto& src_string_offsets = string_column.get_offsets();
+        const auto& src_array_offsets = array_column.get_offsets();
+        size_t current_src_array_offset = 0;
 
         // Concat string in array
         for (size_t i = 0; i < input_rows_count; ++i) {
diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp
index 3188808b54..c970ec1092 100644
--- a/be/test/vec/core/block_test.cpp
+++ b/be/test/vec/core/block_test.cpp
@@ -196,10 +196,10 @@ void block_to_pb(
 }
 
 void fill_block_with_array_int(vectorized::Block& block) {
-    auto off_column = 
vectorized::ColumnVector<vectorized::IColumn::Offset>::create();
+    auto off_column = 
vectorized::ColumnVector<vectorized::IColumn::Offset64>::create();
     auto data_column = vectorized::ColumnVector<int32_t>::create();
     // init column array with [[1,2,3],[],[4],[5,6]]
-    std::vector<vectorized::IColumn::Offset> offs = {0, 3, 3, 4, 6};
+    std::vector<vectorized::IColumn::Offset64> offs = {0, 3, 3, 4, 6};
     std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6};
     for (size_t i = 1; i < offs.size(); ++i) {
         off_column->insert_data((const char*)(&offs[i]), 0);
@@ -218,10 +218,10 @@ void fill_block_with_array_int(vectorized::Block& block) {
 }
 
 void fill_block_with_array_string(vectorized::Block& block) {
-    auto off_column = 
vectorized::ColumnVector<vectorized::IColumn::Offset>::create();
+    auto off_column = 
vectorized::ColumnVector<vectorized::IColumn::Offset64>::create();
     auto data_column = vectorized::ColumnString::create();
     // init column array with [["abc","de"],["fg"],[], [""]];
-    std::vector<vectorized::IColumn::Offset> offs = {0, 2, 3, 3, 4};
+    std::vector<vectorized::IColumn::Offset64> offs = {0, 2, 3, 3, 4};
     std::vector<std::string> vals = {"abc", "de", "fg", ""};
     for (size_t i = 1; i < offs.size(); ++i) {
         off_column->insert_data((const char*)(&offs[i]), 0);
diff --git a/be/test/vec/core/column_array_test.cpp 
b/be/test/vec/core/column_array_test.cpp
index 60725501ab..7b71e0e4df 100644
--- a/be/test/vec/core/column_array_test.cpp
+++ b/be/test/vec/core/column_array_test.cpp
@@ -28,7 +28,7 @@
 
 namespace doris::vectorized {
 
-void check_array_offsets(const IColumn& arr, const 
std::vector<IColumn::Offset>& offs) {
+void check_array_offsets(const IColumn& arr, const 
std::vector<IColumn::Offset64>& offs) {
     auto arr_col = check_and_get_column<ColumnArray>(arr);
     ASSERT_EQ(arr_col->size(), offs.size());
     for (size_t i = 0; i < arr_col->size(); ++i) {
@@ -57,10 +57,10 @@ void check_array_data(const IColumn& arr, const 
std::vector<std::string>& data)
 }
 
 TEST(ColumnArrayTest, IntArrayTest) {
-    auto off_column = ColumnVector<IColumn::Offset>::create();
+    auto off_column = ColumnVector<IColumn::Offset64>::create();
     auto data_column = ColumnVector<int32_t>::create();
     // init column array with [[1,2,3],[],[4]]
-    std::vector<IColumn::Offset> offs = {0, 3, 3, 4};
+    std::vector<IColumn::Offset64> offs = {0, 3, 3, 4};
     std::vector<int32_t> vals = {1, 2, 3, 4};
     for (size_t i = 1; i < offs.size(); ++i) {
         off_column->insert_data((const char*)(&offs[i]), 0);
@@ -82,10 +82,10 @@ TEST(ColumnArrayTest, IntArrayTest) {
 }
 
 TEST(ColumnArrayTest, StringArrayTest) {
-    auto off_column = ColumnVector<IColumn::Offset>::create();
+    auto off_column = ColumnVector<IColumn::Offset64>::create();
     auto data_column = ColumnString::create();
     // init column array with [["abc","d"],["ef"],[], [""]];
-    std::vector<IColumn::Offset> offs = {0, 2, 3, 3, 4};
+    std::vector<IColumn::Offset64> offs = {0, 2, 3, 3, 4};
     std::vector<std::string> vals = {"abc", "d", "ef", ""};
     for (size_t i = 1; i < offs.size(); ++i) {
         off_column->insert_data((const char*)(&offs[i]), 0);
@@ -107,10 +107,10 @@ TEST(ColumnArrayTest, StringArrayTest) {
 }
 
 TEST(ColumnArrayTest, IntArrayPermuteTest) {
-    auto off_column = ColumnVector<IColumn::Offset>::create();
+    auto off_column = ColumnVector<IColumn::Offset64>::create();
     auto data_column = ColumnVector<int32_t>::create();
     // init column array with [[1,2,3],[],[4],[5,6]]
-    std::vector<IColumn::Offset> offs = {0, 3, 3, 4, 6};
+    std::vector<IColumn::Offset64> offs = {0, 3, 3, 4, 6};
     std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6};
     for (size_t i = 1; i < offs.size(); ++i) {
         off_column->insert_data((const char*)(&offs[i]), 0);
@@ -133,10 +133,10 @@ TEST(ColumnArrayTest, IntArrayPermuteTest) {
 }
 
 TEST(ColumnArrayTest, StringArrayPermuteTest) {
-    auto off_column = ColumnVector<IColumn::Offset>::create();
+    auto off_column = ColumnVector<IColumn::Offset64>::create();
     auto data_column = ColumnString::create();
     // init column array with [["abc","d"],["ef"],[], [""]];
-    std::vector<IColumn::Offset> offs = {0, 2, 3, 3, 4};
+    std::vector<IColumn::Offset64> offs = {0, 2, 3, 3, 4};
     std::vector<std::string> vals = {"abc", "d", "ef", ""};
     for (size_t i = 1; i < offs.size(); ++i) {
         off_column->insert_data((const char*)(&offs[i]), 0);
@@ -159,10 +159,10 @@ TEST(ColumnArrayTest, StringArrayPermuteTest) {
 }
 
 TEST(ColumnArrayTest, EmptyArrayPermuteTest) {
-    auto off_column = ColumnVector<IColumn::Offset>::create();
+    auto off_column = ColumnVector<IColumn::Offset64>::create();
     auto data_column = ColumnVector<int32_t>::create();
     // init column array with [[],[],[],[]]
-    std::vector<IColumn::Offset> offs = {0, 0, 0, 0, 0};
+    std::vector<IColumn::Offset64> offs = {0, 0, 0, 0, 0};
     std::vector<int32_t> vals = {};
     for (size_t i = 1; i < offs.size(); ++i) {
         off_column->insert_data((const char*)(&offs[i]), 0);
@@ -185,10 +185,10 @@ TEST(ColumnArrayTest, EmptyArrayPermuteTest) {
 }
 
 TEST(ColumnArrayTest, IntArrayReplicateTest) {
-    auto off_column = ColumnVector<IColumn::Offset>::create();
+    auto off_column = ColumnVector<IColumn::Offset64>::create();
     auto data_column = ColumnVector<int32_t>::create();
     // init column array with [[1,2,3],[],[4],[5,6]]
-    std::vector<IColumn::Offset> offs = {0, 3, 3, 4, 6};
+    std::vector<IColumn::Offset64> offs = {0, 3, 3, 4, 6};
     std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6};
     for (size_t i = 1; i < offs.size(); ++i) {
         off_column->insert_data((const char*)(&offs[i]), 0);
@@ -209,10 +209,10 @@ TEST(ColumnArrayTest, IntArrayReplicateTest) {
 }
 
 TEST(ColumnArrayTest, StringArrayReplicateTest) {
-    auto off_column = ColumnVector<IColumn::Offset>::create();
+    auto off_column = ColumnVector<IColumn::Offset64>::create();
     auto data_column = ColumnString::create();
     // init column array with [["abc","d"],["ef"],[], [""]];
-    std::vector<IColumn::Offset> offs = {0, 2, 3, 3, 4};
+    std::vector<IColumn::Offset64> offs = {0, 2, 3, 3, 4};
     std::vector<std::string> vals = {"abc", "d", "ef", ""};
     for (size_t i = 1; i < offs.size(); ++i) {
         off_column->insert_data((const char*)(&offs[i]), 0);
diff --git a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp 
b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp
index afa5bf7c6f..6cc32c05a3 100644
--- a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp
+++ b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp
@@ -613,7 +613,7 @@ TEST(ArrowColumnToDorisColumnTest, test_binary) {
 
 template <typename ArrowValueType, bool is_nullable = false>
 static inline std::shared_ptr<arrow::Array> create_array_array(
-        std::vector<IColumn::Offset>& vec_offsets, std::vector<bool>& null_map,
+        std::vector<IColumn::Offset64>& vec_offsets, std::vector<bool>& 
null_map,
         std::shared_ptr<arrow::DataType> value_type, 
std::shared_ptr<arrow::Array> values,
         size_t& counter) {
     using offset_type = typename arrow::ListType::offset_type;
@@ -646,7 +646,7 @@ static inline std::shared_ptr<arrow::Array> 
create_array_array(
 
 template <typename ArrowType, bool is_nullable>
 void test_arrow_to_array_column(ColumnWithTypeAndName& column,
-                                std::vector<IColumn::Offset>& vec_offsets,
+                                std::vector<IColumn::Offset64>& vec_offsets,
                                 std::vector<bool>& null_map,
                                 std::shared_ptr<arrow::DataType> value_type,
                                 std::shared_ptr<arrow::Array> values, const 
std::string& value,
@@ -698,7 +698,7 @@ void test_arrow_to_array_column(ColumnWithTypeAndName& 
column,
 
 template <typename ArrowType, bool is_nullable>
 void test_array(const std::vector<std::string>& test_cases, size_t 
num_elements,
-                std::vector<IColumn::Offset>& vec_offsets, std::vector<bool>& 
null_map,
+                std::vector<IColumn::Offset64>& vec_offsets, 
std::vector<bool>& null_map,
                 std::shared_ptr<arrow::DataType> value_type) {
     TypeDescriptor type(TYPE_ARRAY);
     type.children.push_back(TYPE_VARCHAR);
@@ -724,7 +724,7 @@ void test_array(const std::vector<std::string>& test_cases, 
size_t num_elements,
 TEST(ArrowColumnToDorisColumnTest, test_array) {
     std::vector<std::string> test_cases = {"1.2345678", "-12.34567890", 
"99999999999.99999999",
                                            "-99999999999.99999999"};
-    std::vector<IColumn::Offset> vec_offsets = {0, 3, 3, 4, 6, 6, 64};
+    std::vector<IColumn::Offset64> vec_offsets = {0, 3, 3, 4, 6, 6, 64};
     std::vector<bool> null_map = {false, true, false, false, false, false};
     test_array<arrow::BinaryType, false>(test_cases, 64, vec_offsets, null_map,
                                          arrow::list(arrow::binary()));


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to