cambyzju commented on a change in pull request #8217:
URL: https://github.com/apache/incubator-doris/pull/8217#discussion_r813631047



##########
File path: be/src/vec/columns/column_array.cpp
##########
@@ -0,0 +1,699 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnArray.cpp
+// and modified by Doris
+
+#include <string.h> // memcpy
+
+#include "vec/common/assert_cast.h"
+#include "vec/columns/collator.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/columns_common.h"
+#include "vec/columns/columns_number.h"
+
+namespace doris::vectorized {
+
+namespace ErrorCodes {
+    extern const int NOT_IMPLEMENTED;
+    extern const int BAD_ARGUMENTS;
+    extern const int PARAMETER_OUT_OF_BOUND;
+    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
+    extern const int LOGICAL_ERROR;
+    extern const int TOO_LARGE_ARRAY_SIZE;
+}
+
+/** Obtaining array as Field can be slow for large arrays and consume vast 
amount of memory.
+  * Just don't allow to do it.
+  * You can increase the limit if the following query:
+  *  SELECT range(10000000)
+  * will take less than 500ms on your machine.
+  */
+static constexpr size_t max_array_size_as_field = 1000000;
+
+ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr 
&& offsets_column)
+    : data(std::move(nested_column)), offsets(std::move(offsets_column)) {
+    const ColumnOffsets * offsets_concrete = typeid_cast<const ColumnOffsets 
*>(offsets.get());
+
+    if (!offsets_concrete) {
+        LOG(FATAL) << "offsets_column must be a ColumnUInt64";
+    }
+
+    if (!offsets_concrete->empty() && nested_column) {
+        Offset last_offset = offsets_concrete->get_data().back();
+
+        /// This will also prevent possible overflow in offset.
+        if (nested_column->size() != last_offset) {
+            LOG(FATAL) << "offsets_column has data inconsistent with 
nested_column";
+        }
+    }
+
+    /** NOTE
+      * Arrays with constant value are possible and used in implementation of 
higher order functions (see FunctionReplicate).
+      * But in most cases, arrays with constant value are unexpected and code 
will work wrong. Use with caution.
+      */
+}
+
+ColumnArray::ColumnArray(MutableColumnPtr && nested_column)
+    : data(std::move(nested_column)) {
+    if (!data->empty()) {
+        LOG(FATAL) << "Not empty data passed to ColumnArray, but no offsets 
passed";
+    }
+
+    offsets = ColumnOffsets::create();
+}
+
+std::string ColumnArray::get_name() const { return "Array(" + 
get_data().get_name() + ")"; }
+
+MutableColumnPtr ColumnArray::clone_resized(size_t to_size) const {
+    auto res = ColumnArray::create(get_data().clone_empty());
+
+    if (to_size == 0)
+        return res;
+    size_t from_size = size();
+
+    if (to_size <= from_size) {
+        /// Just cut column.
+        res->get_offsets().assign(get_offsets().begin(), get_offsets().begin() 
+ to_size);
+        res->get_data().insert_range_from(get_data(), 0, get_offsets()[to_size 
- 1]);
+    } else {
+        /// Copy column and append empty arrays for extra elements.
+        Offset offset = 0;
+        if (from_size > 0) {
+            res->get_offsets().assign(get_offsets().begin(), 
get_offsets().end());
+            res->get_data().insert_range_from(get_data(), 0, 
get_data().size());
+            offset = get_offsets().back();
+        }
+
+        res->get_offsets().resize(to_size);
+        for (size_t i = from_size; i < to_size; ++i)
+            res->get_offsets()[i] = offset;
+    }
+
+    return res;
+}
+
+size_t ColumnArray::size() const {
+    return get_offsets().size();
+}
+
+Field ColumnArray::operator[](size_t n) const {
+    size_t offset = offset_at(n);
+    size_t size = size_at(n);
+
+    if (size > max_array_size_as_field)
+        LOG(FATAL) << "Array of size " << size << " is too large to be 
manipulated as single field,"
+                   << "maximum size " << max_array_size_as_field;
+
+    Array res(size);
+
+    for (size_t i = 0; i < size; ++i)
+        res[i] = get_data()[offset + i];
+
+    return res;
+}
+
+void ColumnArray::get(size_t n, Field & res) const {
+    size_t offset = offset_at(n);
+    size_t size = size_at(n);
+
+    if (size > max_array_size_as_field)
+        LOG(FATAL) << "Array of size " << size << " is too large to be 
manipulated as single field,"
+                   << " maximum size " << max_array_size_as_field;
+
+    res = Array(size);
+    Array & res_arr = doris::vectorized::get<Array &>(res);
+
+    for (size_t i = 0; i < size; ++i)
+        get_data().get(offset + i, res_arr[i]);
+}
+
+StringRef ColumnArray::get_data_at(size_t n) const {
+    /** Returns the range of memory that covers all elements of the array.
+      * Works for arrays of fixed length values.
+      * For arrays of strings and arrays of arrays, the resulting chunk of 
memory may not be one-to-one correspondence with the elements,
+      *  since it contains only the data laid in succession, but not the 
offsets.
+      */
+
+    size_t offset_of_first_elem = offset_at(n);
+    StringRef first = 
get_data().get_data_at_with_terminating_zero(offset_of_first_elem);
+
+    size_t array_size = size_at(n);
+    if (array_size == 0)
+        return StringRef(first.data, 0);
+
+    size_t offset_of_last_elem = get_offsets()[n] - 1;
+    StringRef last = 
get_data().get_data_at_with_terminating_zero(offset_of_last_elem);
+
+    return StringRef(first.data, last.data + last.size - first.data);
+}
+
+bool ColumnArray::is_default_at(size_t n) const {
+    const auto & offsets_data = get_offsets();
+    return offsets_data[n] == offsets_data[static_cast<ssize_t>(n) - 1];
+}
+
+void ColumnArray::insert_data(const char * pos, size_t length) {
+    /** Similarly - only for arrays of fixed length values.
+      */
+    if (!data->is_fixed_and_contiguous())
+        LOG(FATAL) << "Method insert_data is not supported for " << get_name();
+
+    size_t field_size = data->size_of_value_if_fixed();
+
+    size_t elems = 0;
+
+    if (length)
+    {
+        const char * end = pos + length;
+        for (; pos + field_size <= end; pos += field_size, ++elems)
+            data->insert_data(pos, field_size);
+
+        if (pos != end)
+            LOG(FATAL) << "Incorrect length argument for method 
ColumnArray::insert_data";
+    }
+
+    get_offsets().push_back(get_offsets().back() + elems);
+}
+
+StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena & arena, 
char const *& begin) const {
+    size_t array_size = size_at(n);
+    size_t offset = offset_at(n);
+
+    char * pos = arena.alloc_continue(sizeof(array_size), begin);
+    memcpy(pos, &array_size, sizeof(array_size));
+
+    StringRef res(pos, sizeof(array_size));
+
+    for (size_t i = 0; i < array_size; ++i) {
+        auto value_ref = get_data().serialize_value_into_arena(offset + i, 
arena, begin);
+        res.data = value_ref.data - res.size;
+        res.size += value_ref.size;
+    }
+
+    return res;
+}
+
+const char * ColumnArray::deserialize_and_insert_from_arena(const char * pos) {
+    size_t array_size = unaligned_load<size_t>(pos);
+    pos += sizeof(array_size);
+
+    for (size_t i = 0; i < array_size; ++i)
+        pos = get_data().deserialize_and_insert_from_arena(pos);
+
+    get_offsets().push_back(get_offsets().back() + array_size);
+    return pos;
+}
+
+void ColumnArray::update_hash_with_value(size_t n, SipHash & hash) const {
+    size_t array_size = size_at(n);
+    size_t offset = offset_at(n);
+
+    hash.update(array_size);
+    for (size_t i = 0; i < array_size; ++i)
+        get_data().update_hash_with_value(offset + i, hash);
+}
+
+void ColumnArray::insert(const Field & x) {
+    const Array & array = doris::vectorized::get<const Array &>(x);
+    size_t size = array.size();
+    for (size_t i = 0; i < size; ++i)
+        get_data().insert(array[i]);
+    get_offsets().push_back(get_offsets().back() + size);
+}
+
+void ColumnArray::insert_from(const IColumn & src_, size_t n) {
+    const ColumnArray & src = assert_cast<const ColumnArray &>(src_);
+    size_t size = src.size_at(n);
+    size_t offset = src.offset_at(n);
+
+    get_data().insert_range_from(src.get_data(), offset, size);
+    get_offsets().push_back(get_offsets().back() + size);
+}
+
+void ColumnArray::insert_default() {
+    /// NOTE 1: We can use back() even if the array is empty (due to zero -1th 
element in PODArray).
+    /// NOTE 2: We cannot use reference in push_back, because reference get 
invalidated if array is reallocated.
+    auto last_offset = get_offsets().back();
+    get_offsets().push_back(last_offset);
+}
+
+void ColumnArray::pop_back(size_t n) {
+    auto & offsets_data = get_offsets();

Review comment:
       done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to