This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ca4af95e2c7 [refactor](storage) Drop PredicateColumnType (#64128)
ca4af95e2c7 is described below
commit ca4af95e2c7bfbb875d4e9d0ee01c7a1b0c43c61
Author: Chenyang Sun <[email protected]>
AuthorDate: Tue Jun 16 13:15:39 2026 +0800
[refactor](storage) Drop PredicateColumnType (#64128)
Drop PredicateColumnType
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [x] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---------
Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
---
be/src/core/column/column_decimal.h | 12 +
be/src/core/column/column_dictionary.h | 17 +-
be/src/core/column/column_execute_util.h | 48 +-
be/src/core/column/column_vector.h | 12 +
be/src/core/column/predicate_column.h | 467 ---------
be/src/exprs/bloom_filter_func.h | 9 +-
be/src/exprs/bloom_filter_func_adaptor.h | 26 +-
be/src/exprs/bloom_filter_func_impl.h | 26 +-
be/src/exprs/create_predicate_function.h | 11 +-
be/src/exprs/function/like.h | 1 -
.../table/iceberg_delete_file_reader_helper.cpp | 6 +-
be/src/format/table/iceberg_reader.cpp | 3 +-
be/src/storage/predicate/bloom_filter_predicate.h | 9 +-
be/src/storage/predicate/comparison_predicate.h | 37 +-
be/src/storage/predicate/in_list_predicate.h | 36 +-
be/src/storage/predicate/like_column_predicate.cpp | 32 +-
be/src/storage/predicate/like_column_predicate.h | 7 +-
be/src/storage/predicate/predicate_creator.h | 2 +-
be/src/storage/schema.cpp | 102 +-
be/src/storage/schema.h | 2 +-
be/src/storage/segment/column_reader.cpp | 3 +-
be/src/storage/segment/segment_iterator.cpp | 17 +-
be/src/storage/tablet/tablet_reader.cpp | 3 +-
be/test/core/column/column_dictionary_test.cpp | 23 +-
be/test/core/column/column_nullable_test.cpp | 5 +-
be/test/core/column/common_column_test.h | 1 +
be/test/core/column/predicate_column_test.cpp | 1082 --------------------
be/test/exprs/bloom_filter_func_test.cpp | 42 +-
.../parquet/byte_array_dict_decoder_test.cpp | 6 +-
.../parquet/fix_length_dict_decoder_test.cpp | 6 +-
.../predicate/block_column_predicate_test.cpp | 22 +-
31 files changed, 222 insertions(+), 1853 deletions(-)
diff --git a/be/src/core/column/column_decimal.h
b/be/src/core/column/column_decimal.h
index 3c89bd3d77b..9c0e859d54d 100644
--- a/be/src/core/column/column_decimal.h
+++ b/be/src/core/column/column_decimal.h
@@ -148,6 +148,18 @@ public:
memset(data.data() + old_size, 0, length * sizeof(data[0]));
}
+ Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn*
col_ptr) override {
+ Self* output = assert_cast<Self*>(col_ptr);
+ auto& res_data = output->get_data();
+ DCHECK(res_data.empty())
+ << "filter_by_selector requires the destination column to be
empty";
+ res_data.resize(sel_size);
+ for (size_t i = 0; i < sel_size; i++) {
+ res_data[i] = data[sel[i]];
+ }
+ return Status::OK();
+ }
+
void insert_many_from(const IColumn& src, size_t position, size_t length)
override;
void pop_back(size_t n) override { data.resize_assume_reserved(data.size()
- n); }
diff --git a/be/src/core/column/column_dictionary.h
b/be/src/core/column/column_dictionary.h
index 7ac780e2bde..57d6ab66e8e 100644
--- a/be/src/core/column/column_dictionary.h
+++ b/be/src/core/column/column_dictionary.h
@@ -23,7 +23,6 @@
#include "core/column/column.h"
#include "core/column/column_string.h"
-#include "core/column/predicate_column.h"
#include "core/pod_array.h"
#include "core/string_ref.h"
#include "core/types.h"
@@ -49,7 +48,7 @@ private:
ColumnDictI32(const ColumnDictI32& src) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR, "copy not supported
in ColumnDictionary");
}
- ColumnDictI32(FieldType type) : _type(type) {}
+ ColumnDictI32() = default;
public:
using Self = ColumnDictI32;
@@ -107,7 +106,7 @@ public:
MutableColumnPtr clone_resized(size_t size) const override {
DCHECK(size == 0);
- return create(_type);
+ return create();
}
void insert(const Field& x) override {
@@ -256,16 +255,7 @@ public:
if (is_dict_sorted() && !is_dict_code_converted()) {
convert_dict_codes_if_necessary();
}
- // if type is OLAP_FIELD_TYPE_CHAR, we need to construct TYPE_CHAR
PredicateColumnType,
- // because the string length will different from varchar and string
which needed to be processed after.
- auto create_column = [this]() -> MutableColumnPtr {
- if (_type == FieldType::OLAP_FIELD_TYPE_CHAR) {
- return PredicateColumnType<TYPE_CHAR>::create();
- }
- return PredicateColumnType<TYPE_STRING>::create();
- };
-
- auto res = create_column();
+ auto res = ColumnString::create();
res->reserve(_codes.capacity());
for (int code : _codes) {
auto value = _dict.get_value(code);
@@ -463,7 +453,6 @@ private:
bool _dict_code_converted = false;
Dictionary _dict;
Container _codes;
- FieldType _type;
std::pair<RowsetId, uint32_t> _rowset_segment_id;
std::vector<StringRef> _strings;
};
diff --git a/be/src/core/column/column_execute_util.h
b/be/src/core/column/column_execute_util.h
index a9966807455..49d4f67684f 100644
--- a/be/src/core/column/column_execute_util.h
+++ b/be/src/core/column/column_execute_util.h
@@ -16,13 +16,17 @@
// under the License.
#pragma once
+#include <concepts>
#include <tuple>
+#include <type_traits>
#include <variant>
+#include <vector>
#include "core/block/column_with_type_and_name.h"
#include "core/column/column.h"
#include "core/column/column_const.h"
#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
#include "core/column/column_vector.h"
#include "core/data_type/define_primitive_type.h"
#include "core/data_type/primitive_type.h"
@@ -32,27 +36,55 @@ namespace doris {
// Utility tools for convenient column execution
-// ColumnElementView is used to distinguish between scalar columns and string
columns
+// Per-row read view over a column. The pointer returned by ptr_at on the
+// string specialization is valid only until the next ptr_at call.
+namespace detail {
+
template <PrimitiveType PType>
-struct ColumnElementView {
+struct NumericElementView {
using ColumnType = typename PrimitiveTypeTraits<PType>::ColumnType;
using ElementType = typename ColumnType::value_type;
const typename ColumnType::Container& data;
- ElementType get_element(size_t idx) const { return data[idx]; }
- const ElementType* get_data() const { return data.data(); }
- ColumnElementView(const IColumn& column)
+ NumericElementView(const IColumn& column)
: data(assert_cast<const ColumnType&>(column).get_data()) {}
+
+ ElementType get_element(size_t idx) const { return data[idx]; }
+ const ElementType* get_data() const { return data.data(); }
+ ElementType operator[](size_t idx) const { return data[idx]; }
+ const ElementType* ptr_at(size_t idx) const { return data.data() + idx; }
+ size_t size() const { return data.size(); }
};
-template <>
-struct ColumnElementView<TYPE_STRING> {
+struct StringElementView {
using ColumnType = ColumnString;
using ElementType = StringRef;
const ColumnString& string_column;
- ColumnElementView(const IColumn& column)
+ mutable StringRef _cell {}; // staging for ptr_at
+
+ StringElementView(const IColumn& column)
: string_column(assert_cast<const ColumnString&>(column)) {}
+
StringRef get_element(size_t idx) const { return
string_column.get_data_at(idx); }
+ StringRef operator[](size_t idx) const { return
string_column.get_data_at(idx); }
+ const StringRef* ptr_at(size_t idx) const {
+ _cell = string_column.get_data_at(idx);
+ return &_cell;
+ }
+ size_t size() const { return string_column.size(); }
+};
+
+} // namespace detail
+
+template <PrimitiveType PType>
+using ColumnElementView = std::conditional_t<is_string_type(PType),
detail::StringElementView,
+
detail::NumericElementView<PType>>;
+
+template <typename T>
+concept ColumnElementSubscriptable = requires(const T& v, size_t i) {
+ typename T::ElementType;
+ { v[i] } -> std::convertible_to<typename T::ElementType>;
+ { v.size() } -> std::convertible_to<size_t>;
};
// ColumnView is used to handle the nullable and const properties of a column.
diff --git a/be/src/core/column/column_vector.h
b/be/src/core/column/column_vector.h
index ae1742fe53e..d39326fa8c0 100644
--- a/be/src/core/column/column_vector.h
+++ b/be/src/core/column/column_vector.h
@@ -271,6 +271,18 @@ public:
void insert_value(const value_type value) { data.push_back(value); }
+ Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn*
col_ptr) override {
+ Self* output = assert_cast<Self*>(col_ptr);
+ auto& res_data = output->get_data();
+ DCHECK(res_data.empty())
+ << "filter_by_selector requires the destination column to be
empty";
+ res_data.resize(sel_size);
+ for (size_t i = 0; i < sel_size; i++) {
+ res_data[i] = data[sel[i]];
+ }
+ return Status::OK();
+ }
+
/// This method implemented in header because it could be possibly
devirtualized.
int compare_at(size_t n, size_t m, const IColumn& rhs_, int
nan_direction_hint) const override {
return Compare::compare(
diff --git a/be/src/core/column/predicate_column.h
b/be/src/core/column/predicate_column.h
deleted file mode 100644
index 0d7df961def..00000000000
--- a/be/src/core/column/predicate_column.h
+++ /dev/null
@@ -1,467 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <optional>
-
-#include "core/arena.h"
-#include "core/column/column.h"
-#include "core/column/column_decimal.h"
-#include "core/column/column_string.h"
-#include "core/column/column_vector.h"
-#include "core/data_type/define_primitive_type.h"
-#include "core/data_type/primitive_type.h"
-#include "core/decimal12.h"
-#include "core/string_ref.h"
-#include "core/types.h"
-#include "core/uint24.h"
-
-namespace doris {
-
-/**
- * used to keep predicate column in storage layer
- *
- * T = predicate column type
- */
-template <PrimitiveType Type>
-class PredicateColumnType final : public COWHelper<IColumn,
PredicateColumnType<Type>> {
-private:
- PredicateColumnType() = default;
- PredicateColumnType(const size_t n) : data(n) {}
- PredicateColumnType(const PredicateColumnType& src) :
data(src.data.begin(), src.data.end()) {}
- friend class COWHelper<IColumn, PredicateColumnType<Type>>;
- using T = std::conditional_t<is_string_type(Type), StringRef,
- typename PrimitiveTypeTraits<Type>::CppType>;
- using ColumnType = typename PrimitiveTypeTraits<Type>::ColumnType;
-
- void insert_string_to_res_column(const uint16_t* sel, size_t sel_size,
ColumnString* res_ptr) {
- _refs.resize(sel_size);
- size_t length = 0;
- for (size_t i = 0; i < sel_size; i++) {
- uint16_t n = sel[i];
- auto& sv = reinterpret_cast<StringRef&>(data[n]);
- _refs[i].data = sv.data;
- _refs[i].size = sv.size;
- length += sv.size;
- }
- res_ptr->get_offsets().reserve(sel_size +
res_ptr->get_offsets().size());
- res_ptr->get_chars().reserve(length + res_ptr->get_chars().size());
- res_ptr->insert_many_strings_without_reserve(_refs.data(), sel_size);
- }
-
- template <PrimitiveType Y, template <PrimitiveType> typename
ColumnContainer>
- void insert_default_value_res_column(const uint16_t* sel, size_t sel_size,
- ColumnContainer<Y>* res_ptr) {
- static_assert(std::is_same_v<ColumnContainer<Y>, ColumnType>);
- auto& res_data = res_ptr->get_data();
- DCHECK(res_data.empty());
- // Has to reserve first, could not call resize or reserve after
get_end_ptr
- // because reserve or resize may change memory block.
- size_t org_num = res_data.size();
- res_data.reserve(sel_size);
- auto* y = (typename
PrimitiveTypeTraits<Y>::CppType*)res_data.get_end_ptr();
- for (size_t i = 0; i < sel_size; i++) {
- y[i] = data[sel[i]];
- }
- res_data.resize(org_num + sel_size);
- }
-
- void insert_byte_to_res_column(const uint16_t* sel, size_t sel_size,
IColumn* res_ptr) {
- for (size_t i = 0; i < sel_size; i++) {
- uint16_t n = sel[i];
- char* ch_val = reinterpret_cast<char*>(&data[n]);
- res_ptr->insert_data(ch_val, 0);
- }
- }
-
- void insert_many_default_type(const char* data_ptr, size_t num) {
- auto old_size = data.size();
- data.resize(old_size + num);
- memcpy(reinterpret_cast<void*>(data.data() + old_size), data_ptr, num
* sizeof(T));
- }
-
-public:
- using Self = PredicateColumnType;
- using value_type = T;
- using Container = PaddedPODArray<value_type>;
-
- size_t size() const override { return data.size(); }
-
- StringRef get_data_at(size_t n) const override {
- if constexpr (std::is_same_v<T, StringRef>) {
- return reinterpret_cast<const StringRef&>(data[n]);
- } else {
- throw doris::Exception(
- ErrorCode::INTERNAL_ERROR,
- "should not call get_data_at in predicate column except
for string type");
- }
- }
-
- void insert_from(const IColumn& src, size_t n) override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "should not call insert_from in predicate
column");
- }
-
- void insert_range_from(const IColumn& src, size_t start, size_t length)
override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "should not call insert_range_from in predicate
column");
- }
-
- void insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
- const uint32_t* indices_end) override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "should not call insert_indices_from in
predicate column");
- }
-
- void pop_back(size_t n) override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "should not call pop_back in predicate column");
- }
-
- void update_hash_with_value(size_t n, SipHash& hash) const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "should not call update_hash_with_value in
predicate column");
- }
-
- void insert_string_value(const char* data_ptr, size_t length) {
- StringRef sv((char*)data_ptr, length);
- data.push_back_without_reserve(sv);
- }
-
- // used for int128
- void insert_in_copy_way(const char* data_ptr, size_t length) {
- T val {};
- memcpy(&val, data_ptr, sizeof(val));
- data.push_back_without_reserve(val);
- }
-
- void insert_default_type(const char* data_ptr, size_t length) {
- T* val = (T*)data_ptr;
- data.push_back_without_reserve(*val);
- }
-
- void insert_data(const char* data_ptr, size_t length) override {
- if constexpr (std::is_same_v<T, StringRef>) {
- insert_string_value(data_ptr, length);
- } else if constexpr (std::is_same_v<T, Int128>) {
- insert_in_copy_way(data_ptr, length);
- } else {
- insert_default_type(data_ptr, length);
- }
- }
-
- void insert_many_date(const char* data_ptr, size_t num) {
- constexpr size_t input_type_size =
sizeof(PrimitiveTypeTraits<TYPE_DATE>::StorageFieldType);
- static_assert(input_type_size == sizeof(uint24_t));
- const auto* input_data_ptr = reinterpret_cast<const
uint24_t*>(data_ptr);
- auto* res_ptr =
reinterpret_cast<VecDateTimeValue*>(data.get_end_ptr());
- size_t old_size = data.size();
- for (int i = 0; i < num; i++) {
-
res_ptr[i].set_olap_date(unaligned_load<uint24_t>(&input_data_ptr[i]));
- }
- data.resize(old_size + num);
- }
-
- void insert_many_datetime(const char* data_ptr, size_t num) {
- constexpr size_t input_type_size =
- sizeof(PrimitiveTypeTraits<TYPE_DATETIME>::StorageFieldType);
- static_assert(input_type_size == sizeof(uint64_t));
- const auto* input_data_ptr = reinterpret_cast<const
uint64_t*>(data_ptr);
- auto* res_ptr =
reinterpret_cast<VecDateTimeValue*>(data.get_end_ptr());
- size_t old_size = data.size();
- for (int i = 0; i < num; i++) {
- res_ptr[i].from_olap_datetime(input_data_ptr[i]);
- }
- data.resize(old_size + num);
- }
-
- // The logic is same to ColumnDecimal::insert_many_fix_len_data
- void insert_many_decimalv2(const char* data_ptr, size_t num) {
- size_t old_size = data.size();
- data.resize(old_size + num);
-
- auto* target = (DecimalV2Value*)(data.data() + old_size);
- for (int i = 0; i < num; i++) {
- const char* cur_ptr = data_ptr + sizeof(decimal12_t) * i;
- auto int_value = unaligned_load<int64_t>(cur_ptr);
- int32_t frac_value = *(int32_t*)(cur_ptr + sizeof(int64_t));
- target[i].from_olap_decimal(int_value, frac_value);
- }
- }
-
- void insert_many_fix_len_data(const char* data_ptr, size_t num) override {
- if constexpr (Type == TYPE_DECIMALV2) {
- // DecimalV2 is special, its storage is <int64, int32>, but its
compute type is <int64,int64>
- // should convert here, but it may have some performance lost
- insert_many_decimalv2(data_ptr, num);
- } else if constexpr (std::is_same_v<T, StringRef>) {
- // here is unreachable, just for compilation to be able to pass
- } else if constexpr (Type == TYPE_DATE) {
- // Datev1 is special, its storage is uint24, but its compute type
is actual int64.
- insert_many_date(data_ptr, num);
- } else if constexpr (Type == TYPE_DATETIME) {
- insert_many_datetime(data_ptr, num);
- } else {
- insert_many_default_type(data_ptr, num);
- }
- }
-
- void insert_many_dict_data(const int32_t* data_array, size_t start_index,
const StringRef* dict,
- size_t num, uint32_t /*dict_num*/) override {
- if constexpr (std::is_same_v<T, StringRef>) {
- for (size_t end_index = start_index + num; start_index <
end_index; ++start_index) {
- int32_t codeword = data_array[start_index];
- insert_string_value(dict[codeword].data, dict[codeword].size);
- }
- }
- }
-
- void insert_many_continuous_binary_data(const char* data_, const uint32_t*
offsets,
- const size_t num) override {
- if (UNLIKELY(num == 0)) {
- return;
- }
- if constexpr (std::is_same_v<T, StringRef>) {
- const auto total_mem_size = offsets[num] - offsets[0];
- char* destination = _arena.alloc(total_mem_size);
- memcpy(destination, data_ + offsets[0], total_mem_size);
- size_t org_elem_num = data.size();
- data.resize(org_elem_num + num);
-
- auto* data_ptr = &data[org_elem_num];
- for (size_t i = 0; i != num; ++i) {
- data_ptr[i].data = destination + offsets[i] - offsets[0];
- data_ptr[i].size = offsets[i + 1] - offsets[i];
- }
- DCHECK(data_ptr[num - 1].data + data_ptr[num - 1].size ==
destination + total_mem_size);
- }
- }
-
- void insert_many_strings(const StringRef* strings, size_t num) override {
- if (num == 0) {
- return;
- }
- if constexpr (std::is_same_v<T, StringRef>) {
- size_t total_mem_size = 0;
- for (size_t i = 0; i < num; i++) {
- total_mem_size += strings[i].size;
- }
-
- char* destination = _arena.alloc(total_mem_size);
- char* org_dst = destination;
- size_t org_elem_num = data.size();
- data.resize(org_elem_num + num);
- uint64_t fragment_start_offset = 0;
- size_t fragment_len = 0;
- for (size_t i = 0; i < num; i++) {
- data[org_elem_num + i].data = destination + fragment_len;
- data[org_elem_num + i].size = strings[i].size;
- fragment_len += strings[i].size;
- // Compute the largest continuous memcpy block and copy them.
- // If this is the last element in data array, then should copy
the current memory block.
- if (i == num - 1 || strings[i + 1].data != strings[i].data +
strings[i].size) {
- memcpy(destination, strings[fragment_start_offset].data,
fragment_len);
- destination += fragment_len;
- fragment_start_offset = i == num - 1 ? 0 : i + 1;
- fragment_len = 0;
- }
- }
- CHECK(destination - org_dst == total_mem_size)
- << "Copied size not equal to expected size";
- } else {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "Method insert_many_binary_data is not
supported");
- }
- }
-
- // Insert `num` entries with only length information (no actual char data).
- // The chars buffer is zero-filled so that filter_by_selector can safely
- // memcpy without reading meaningful content. Used in OFFSET_ONLY reading
- // mode where only string lengths (for length() function) are needed.
- void insert_offsets_from_lengths(const uint32_t* lengths, size_t num)
override {
- if constexpr (std::is_same_v<T, StringRef>) {
- if (UNLIKELY(num == 0)) {
- return;
- }
- size_t total_bytes = 0;
- for (size_t i = 0; i < num; ++i) {
- total_bytes += lengths[i];
- }
- // Allocate and zero-fill a single backing buffer so that each
StringRef
- // points to valid (though meaningless) memory. filter_by_selector
will
- // memcpy from these pointers, so they must not be null for
non-zero lengths.
- char* buf = total_bytes > 0 ? _arena.alloc(total_bytes) : nullptr;
- if (total_bytes > 0) {
- memset(buf, 0, total_bytes);
- }
- size_t org_elem_num = data.size();
- data.resize(org_elem_num + num);
- size_t offset = 0;
- for (size_t i = 0; i < num; ++i) {
- // For zero-length strings, data pointer is null;
insert_many_strings
- // and filter_by_selector both guard on size > 0 before
dereferencing.
- data[org_elem_num + i].data = (lengths[i] > 0) ? (buf +
offset) : nullptr;
- data[org_elem_num + i].size = lengths[i];
- offset += lengths[i];
- }
- } else {
- IColumn::insert_offsets_from_lengths(lengths, num);
- }
- }
-
- void insert_default() override { data.push_back(T()); }
-
- void clear() override {
- data.clear();
- _arena.clear();
- }
-
- size_t byte_size() const override { return data.size() * sizeof(T); }
-
- size_t allocated_bytes() const override { return byte_size(); }
-
- bool has_enough_capacity(const IColumn& src) const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "has_enough_capacity not supported in
PredicateColumnType");
- }
-
- void reserve(size_t n) override { data.reserve(n); }
-
- std::string get_name() const override { return type_to_string(Type); }
-
- MutableColumnPtr clone_resized(size_t size) const override {
- DCHECK(size == 0);
- return this->create();
- }
-
- void insert_duplicate_fields(const Field& x, const size_t n) override {
- if constexpr (is_string_type(Type)) {
- const auto& str = x.get<TYPE_STRING>();
- auto* dst = _arena.alloc(str.size() * n);
- for (size_t i = 0; i < n; i++) {
- memcpy(dst, str.data(), str.size());
- insert_string_value(dst, str.size());
- dst += str.size();
- }
- } else if constexpr (Type == TYPE_LARGEINT) {
- const auto& v = x.get<TYPE_LARGEINT>();
- for (size_t i = 0; i < n; i++) {
- insert_in_copy_way(reinterpret_cast<const char*>(&v),
sizeof(v));
- }
- } else {
- const auto& v = x.get<Type>();
- for (size_t i = 0; i < n; i++) {
- insert_default_type(reinterpret_cast<const char*>(&v),
sizeof(v));
- }
- }
- }
-
- void insert(const Field& x) override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "insert not supported in PredicateColumnType");
- }
-
- [[noreturn]] Field operator[](size_t n) const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "operator[] not supported in
PredicateColumnType");
- }
-
- void get(size_t n, Field& res) const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "get field not supported in
PredicateColumnType");
- }
-
- [[noreturn]] bool get_bool(size_t n) const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "get field not supported in
PredicateColumnType");
- }
-
- [[noreturn]] Int64 get_int(size_t n) const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "get field not supported in
PredicateColumnType");
- }
-
- // it's impossible to use ComplexType as key , so we don't have to
implement them
- [[noreturn]] StringRef serialize_value_into_arena(size_t n, Arena& arena,
- char const*& begin)
const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "serialize_value_into_arena not supported in
PredicateColumnType");
- }
-
- [[noreturn]] const char* deserialize_and_insert_from_arena(const char*
pos) override {
- throw doris::Exception(
- ErrorCode::INTERNAL_ERROR,
- "deserialize_and_insert_from_arena not supported in
PredicateColumnType");
- }
-
- [[noreturn]] StringRef get_raw_data() const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "get_raw_data not supported in
PredicateColumnType");
- }
-
- [[noreturn]] bool structure_equals(const IColumn& rhs) const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "structure_equals not supported in
PredicateColumnType");
- }
-
- [[noreturn]] ColumnPtr filter(const IColumn::Filter& filt,
- ssize_t result_size_hint) const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "filter not supported in PredicateColumnType");
- }
-
- [[noreturn]] size_t filter(const IColumn::Filter&) override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "filter not supported in PredicateColumnType");
- }
-
- [[noreturn]] MutableColumnPtr permute(const IColumn::Permutation& perm,
- size_t limit) const override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "permute not supported in PredicateColumnType");
- }
-
- Container& get_data() { return data; }
-
- const Container& get_data() const { return data; }
-
- Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn*
col_ptr) override {
- ColumnType* column = assert_cast<ColumnType*>(col_ptr);
- if constexpr (is_string_type(Type)) {
- insert_string_to_res_column(sel, sel_size, column);
- } else if constexpr (Type == TYPE_BOOLEAN) {
- insert_byte_to_res_column(sel, sel_size, col_ptr);
- } else {
- insert_default_value_res_column(sel, sel_size, column);
- }
- return Status::OK();
- }
-
- void replace_column_data(const IColumn&, size_t row, size_t self_row = 0)
override {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "should not call replace_column_data in
predicate column");
- }
-
-private:
- Container data;
- // manages the memory for slice's data(For string type)
- Arena _arena;
- std::vector<StringRef> _refs;
-};
-
-} // namespace doris
diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h
index 60fc2983fca..ae7057a6e90 100644
--- a/be/src/exprs/bloom_filter_func.h
+++ b/be/src/exprs/bloom_filter_func.h
@@ -136,7 +136,7 @@ public:
virtual void find_fixed_len(const ColumnPtr& column, uint8_t* results,
const uint8_t* __restrict filter = nullptr) =
0;
- virtual uint16_t find_fixed_len_olap_engine(const char* data, const
uint8_t* nullmap,
+ virtual uint16_t find_fixed_len_olap_engine(const IColumn& column, const
uint8_t* nullmap,
uint16_t* offsets, int number,
bool is_parse_column) = 0;
@@ -198,9 +198,10 @@ public:
return new_size;
}
- uint16_t find_fixed_len_olap_engine(const char* data, const uint8_t*
nullmap, uint16_t* offsets,
- int number, bool is_parse_column)
override {
- return OpV2::find_batch_olap_engine(*_bloom_filter, data, nullmap,
offsets, number,
+ uint16_t find_fixed_len_olap_engine(const IColumn& column, const uint8_t*
nullmap,
+ uint16_t* offsets, int number,
+ bool is_parse_column) override {
+ return OpV2::find_batch_olap_engine(*_bloom_filter, column, nullmap,
offsets, number,
is_parse_column);
}
diff --git a/be/src/exprs/bloom_filter_func_adaptor.h
b/be/src/exprs/bloom_filter_func_adaptor.h
index 8f2b3eb512f..e79fa4b1816 100644
--- a/be/src/exprs/bloom_filter_func_adaptor.h
+++ b/be/src/exprs/bloom_filter_func_adaptor.h
@@ -80,11 +80,19 @@ private:
template <typename fixed_len_to_uint32_method, class T>
struct CommonFindOp {
- static uint16_t find_batch_olap_engine(const BloomFilterAdaptor&
bloom_filter, const char* data,
- const uint8_t* nullmap, uint16_t*
offsets, int number,
+ static uint16_t find_batch_olap_engine(const BloomFilterAdaptor&
bloom_filter,
+ const doris::IColumn& column, const
uint8_t* nullmap,
+ uint16_t* offsets, int number,
const bool is_parse_column) {
- return find_batch_olap<fixed_len_to_uint32_method, T>(bloom_filter,
data, nullmap, offsets,
- number,
is_parse_column);
+ // This path reinterpret-casts the column's raw data as a contiguous
T[], which is only
+ // valid for fixed-length types. String types (StringRef/String) are
not laid out this way
+ // and must go through StringFindOp instead.
+ static_assert(!std::is_same_v<T, StringRef> && !std::is_same_v<T,
String>,
+ "find_batch_olap_engine does not support string types;
use StringFindOp");
+ const T* __restrict data = reinterpret_cast<const
T*>(column.get_raw_data().data);
+ return find_batch_olap_impl<fixed_len_to_uint32_method>(
+ bloom_filter, [data](int i) { return data[i]; }, nullmap,
offsets, number,
+ is_parse_column);
}
template <typename Func>
@@ -171,6 +179,16 @@ template <typename fixed_len_to_uint32_method>
struct StringFindOp : CommonFindOp<fixed_len_to_uint32_method, StringRef> {
using CommonFindOp<fixed_len_to_uint32_method,
StringRef>::for_each_with_filter;
+ static uint16_t find_batch_olap_engine(const BloomFilterAdaptor&
bloom_filter,
+ const doris::IColumn& column, const
uint8_t* nullmap,
+ uint16_t* offsets, int number,
+ const bool is_parse_column) {
+ const auto& col = assert_cast<const ColumnString&>(column);
+ return find_batch_olap_impl<fixed_len_to_uint32_method>(
+ bloom_filter, [&col](int i) { return col.get_data_at(i); },
nullmap, offsets,
+ number, is_parse_column);
+ }
+
static void insert_batch(BloomFilterAdaptor& bloom_filter, const
ColumnPtr& column,
size_t start) {
auto _insert_batch_col_str = [&](const auto& col, const uint8_t*
__restrict nullmap,
diff --git a/be/src/exprs/bloom_filter_func_impl.h
b/be/src/exprs/bloom_filter_func_impl.h
index 4ecb39cb267..0bbede8c26e 100644
--- a/be/src/exprs/bloom_filter_func_impl.h
+++ b/be/src/exprs/bloom_filter_func_impl.h
@@ -19,6 +19,7 @@
#include <type_traits>
+#include "common/compiler_util.h"
#include "core/data_type/primitive_type.h"
#include "core/string_ref.h"
#include "exec/common/hash_table/hash.h"
@@ -54,21 +55,18 @@ struct fixed_len_to_uint32_v2 {
}
};
-template <typename fixed_len_to_uint32_method, typename T>
-uint16_t find_batch_olap(const BloomFilterAdaptor& bloom_filter, const char*
data,
- const uint8_t* nullmap, uint16_t* offsets, int number,
- const bool is_parse_column) {
- auto get_element = [](const char* input_data, int idx) {
- return ((const T*)(input_data))[idx];
- };
-
+// Per-row probe driver. `read(idx)` yields the row value (T or StringRef).
+template <typename fixed_len_to_uint32_method, typename Accessor>
+inline ALWAYS_INLINE uint16_t find_batch_olap_impl(const BloomFilterAdaptor&
bloom_filter,
+ Accessor read, const
uint8_t* nullmap,
+ uint16_t* offsets, int
number,
+ const bool is_parse_column)
{
uint16_t new_size = 0;
if (is_parse_column) {
if (nullmap == nullptr) {
for (uint16_t i = 0; i < number; i++) {
uint16_t idx = offsets[i];
- if (!bloom_filter.test_element<fixed_len_to_uint32_method>(
- get_element(data, idx))) {
+ if
(!bloom_filter.test_element<fixed_len_to_uint32_method>(read(idx))) {
continue;
}
offsets[new_size++] = idx;
@@ -81,8 +79,7 @@ uint16_t find_batch_olap(const BloomFilterAdaptor&
bloom_filter, const char* dat
continue;
}
} else {
- if (!bloom_filter.test_element<fixed_len_to_uint32_method>(
- get_element(data, idx))) {
+ if
(!bloom_filter.test_element<fixed_len_to_uint32_method>(read(idx))) {
continue;
}
}
@@ -92,7 +89,7 @@ uint16_t find_batch_olap(const BloomFilterAdaptor&
bloom_filter, const char* dat
} else {
if (nullmap == nullptr) {
for (uint16_t i = 0; i < number; i++) {
- if
(!bloom_filter.test_element<fixed_len_to_uint32_method>(get_element(data, i))) {
+ if
(!bloom_filter.test_element<fixed_len_to_uint32_method>(read(i))) {
continue;
}
offsets[new_size++] = i;
@@ -104,8 +101,7 @@ uint16_t find_batch_olap(const BloomFilterAdaptor&
bloom_filter, const char* dat
continue;
}
} else {
- if (!bloom_filter.test_element<fixed_len_to_uint32_method>(
- get_element(data, i))) {
+ if
(!bloom_filter.test_element<fixed_len_to_uint32_method>(read(i))) {
continue;
}
}
diff --git a/be/src/exprs/create_predicate_function.h
b/be/src/exprs/create_predicate_function.h
index 6bfcca1801d..1955ea7c25f 100644
--- a/be/src/exprs/create_predicate_function.h
+++ b/be/src/exprs/create_predicate_function.h
@@ -196,14 +196,9 @@ std::shared_ptr<ColumnPredicate>
create_olap_column_predicate(
uint32_t column_id, const std::shared_ptr<FunctionFilter>& filter,
const TabletColumn* column, bool) {
// currently only support like predicate
- if constexpr (PT == TYPE_CHAR) {
- return
LikeColumnPredicate<TYPE_CHAR>::create_shared(filter->_opposite, column_id,
- column->name(),
filter->_fn_ctx,
-
filter->_string_param);
- } else if constexpr (PT == TYPE_VARCHAR || PT == TYPE_STRING) {
- return
LikeColumnPredicate<TYPE_STRING>::create_shared(filter->_opposite, column_id,
- column->name(),
filter->_fn_ctx,
-
filter->_string_param);
+ if constexpr (PT == TYPE_CHAR || PT == TYPE_VARCHAR || PT == TYPE_STRING) {
+ return LikeColumnPredicate::create_shared(filter->_opposite,
column_id, column->name(),
+ filter->_fn_ctx,
filter->_string_param);
}
throw Exception(ErrorCode::INTERNAL_ERROR, "function filter do not support
type {}", PT);
}
diff --git a/be/src/exprs/function/like.h b/be/src/exprs/function/like.h
index 4c2631d10c3..461c97956bc 100644
--- a/be/src/exprs/function/like.h
+++ b/be/src/exprs/function/like.h
@@ -33,7 +33,6 @@
#include "common/status.h"
#include "core/block/column_numbers.h"
#include "core/column/column_string.h"
-#include "core/column/predicate_column.h"
#include "core/data_type/data_type_number.h"
#include "core/data_type/define_primitive_type.h"
#include "core/string_ref.h"
diff --git a/be/src/format/table/iceberg_delete_file_reader_helper.cpp
b/be/src/format/table/iceberg_delete_file_reader_helper.cpp
index 2e7045c81ad..c0fe46e1d35 100644
--- a/be/src/format/table/iceberg_delete_file_reader_helper.cpp
+++ b/be/src/format/table/iceberg_delete_file_reader_helper.cpp
@@ -243,9 +243,9 @@ Status read_iceberg_position_delete_file(const
TIcebergDeleteFileDesc& delete_fi
while (!eof) {
Block block;
if (dictionary_coded) {
- block.insert(ColumnWithTypeAndName(
-
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_VARCHAR),
- std::make_shared<DataTypeString>(),
ICEBERG_FILE_PATH));
+ block.insert(ColumnWithTypeAndName(ColumnDictI32::create(),
+
std::make_shared<DataTypeString>(),
+ ICEBERG_FILE_PATH));
} else {
block.insert(ColumnWithTypeAndName(ColumnString::create(),
std::make_shared<DataTypeString>(),
diff --git a/be/src/format/table/iceberg_reader.cpp
b/be/src/format/table/iceberg_reader.cpp
index d66d91d162f..6f6d0b9e9d5 100644
--- a/be/src/format/table/iceberg_reader.cpp
+++ b/be/src/format/table/iceberg_reader.cpp
@@ -391,8 +391,7 @@ Status
IcebergParquetReader::_read_position_delete_file(const TFileRangeDesc* de
bool eof = false;
while (!eof) {
Block block = {dictionary_coded
- ? ColumnWithTypeAndName {ColumnDictI32::create(
-
FieldType::OLAP_FIELD_TYPE_VARCHAR),
+ ? ColumnWithTypeAndName
{ColumnDictI32::create(),
data_type_file_path,
ICEBERG_FILE_PATH}
: ColumnWithTypeAndName {data_type_file_path,
ICEBERG_FILE_PATH},
diff --git a/be/src/storage/predicate/bloom_filter_predicate.h
b/be/src/storage/predicate/bloom_filter_predicate.h
index cc252394ce2..a547ec1ff9c 100644
--- a/be/src/storage/predicate/bloom_filter_predicate.h
+++ b/be/src/storage/predicate/bloom_filter_predicate.h
@@ -20,8 +20,8 @@
#include "core/assert_cast.h"
#include "core/column/column_dictionary.h"
#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
#include "core/column/column_vector.h"
-#include "core/column/predicate_column.h"
#include "core/data_type/primitive_type.h"
#include "exprs/bloom_filter_func.h"
#include "exprs/runtime_filter_expr.h"
@@ -80,11 +80,8 @@ private:
new_size = _specific_filter->template
find_dict_olap_engine<is_nullable>(
dict_col, null_map, sel, size);
} else {
- const auto& data =
- assert_cast<const
PredicateColumnType<PredicateEvaluateType<T>>*>(&column)
- ->get_data();
- new_size =
_specific_filter->find_fixed_len_olap_engine((char*)data.data(), null_map,
- sel, size,
data.size() != size);
+ new_size = _specific_filter->find_fixed_len_olap_engine(
+ column, null_map, sel, size,
/*is_parse_column=*/column.size() != size);
}
return new_size;
}
diff --git a/be/src/storage/predicate/comparison_predicate.h
b/be/src/storage/predicate/comparison_predicate.h
index 8bb6b6853fd..85de8abb0ff 100644
--- a/be/src/storage/predicate/comparison_predicate.h
+++ b/be/src/storage/predicate/comparison_predicate.h
@@ -22,6 +22,8 @@
#include "common/compare.h"
#include "core/column/column_dictionary.h"
+#include "core/column/column_execute_util.h"
+#include "core/column/column_string.h"
#include "core/field.h"
#include "storage/index/bloom_filter/bloom_filter.h"
#include "storage/index/inverted/inverted_index_cache.h" // IWYU pragma: keep
@@ -424,12 +426,7 @@ public:
__builtin_unreachable();
}
} else {
- auto* data_array =
- assert_cast<const
PredicateColumnType<PredicateEvaluateType<Type>>*>(
- &nested_column)
- ->get_data()
- .data();
-
+ ColumnElementView<Type> data_array {nested_column};
_base_loop_vec<true, is_and>(size, flags, null_map.data(),
data_array, _value);
}
} else {
@@ -453,12 +450,7 @@ public:
__builtin_unreachable();
}
} else {
- auto* data_array =
- assert_cast<const
PredicateColumnType<PredicateEvaluateType<Type>>*>(
- &column)
- ->get_data()
- .data();
-
+ ColumnElementView<Type> data_array {column};
_base_loop_vec<false, is_and>(size, flags, nullptr,
data_array, _value);
}
}
@@ -554,10 +546,12 @@ private:
}
}
+ // `data_array` is raw pointer or ColumnElementView wrapper; passed by
value
+ // (no __restrict on struct), SIMD preserved via loop versioning.
template <bool is_nullable, bool is_and, typename TArray, typename TValue>
void __attribute__((flatten))
_base_loop_vec(uint16_t size, bool* __restrict bflags, const uint8_t*
__restrict null_map,
- const TArray* __restrict data_array, const TValue& value)
const {
+ TArray data_array, const TValue& value) const {
//uint8_t helps compiler to generate vectorized code
auto* flags = reinterpret_cast<uint8_t*>(bflags);
if constexpr (is_and) {
@@ -581,7 +575,7 @@ private:
template <bool is_nullable, bool is_and, typename TArray, typename TValue>
void _base_loop_bit(const uint16_t* sel, uint16_t size, bool* flags,
- const uint8_t* __restrict null_map, const TArray*
__restrict data_array,
+ const uint8_t* __restrict null_map, TArray data_array,
const TValue& value) const {
for (uint16_t i = 0; i < size; i++) {
if (is_and ^ flags[i]) {
@@ -615,11 +609,7 @@ private:
__builtin_unreachable();
}
} else {
- auto* data_array =
- assert_cast<const
PredicateColumnType<PredicateEvaluateType<Type>>*>(column)
- ->get_data()
- .data();
-
+ ColumnElementView<Type> data_array {*column};
_base_loop_bit<is_nullable, is_and>(sel, size, flags, null_map,
data_array, _value);
}
}
@@ -653,14 +643,11 @@ private:
return 0;
}
} else {
- auto& pred_col =
- assert_cast<const
PredicateColumnType<PredicateEvaluateType<Type>>*>(column)
- ->get_data();
- auto pred_col_data = pred_col.data();
uint16_t new_size = 0;
+ ColumnElementView<Type> pred_col {*column};
#define EVALUATE_WITH_NULL_IMPL(IDX) \
- _opposite ^ (!null_map[IDX] && _operator(pred_col_data[IDX], _value))
-#define EVALUATE_WITHOUT_NULL_IMPL(IDX) _opposite ^
_operator(pred_col_data[IDX], _value)
+ _opposite ^ (!null_map[IDX] && _operator(pred_col[IDX], _value))
+#define EVALUATE_WITHOUT_NULL_IMPL(IDX) _opposite ^ _operator(pred_col[IDX],
_value)
EVALUATE_BY_SELECTOR(EVALUATE_WITH_NULL_IMPL,
EVALUATE_WITHOUT_NULL_IMPL)
#undef EVALUATE_WITH_NULL_IMPL
#undef EVALUATE_WITHOUT_NULL_IMPL
diff --git a/be/src/storage/predicate/in_list_predicate.h
b/be/src/storage/predicate/in_list_predicate.h
index 53b1ebb1d64..180a1b04139 100644
--- a/be/src/storage/predicate/in_list_predicate.h
+++ b/be/src/storage/predicate/in_list_predicate.h
@@ -23,6 +23,8 @@
#include "common/compiler_util.h"
#include "common/exception.h"
#include "core/column/column_dictionary.h"
+#include "core/column/column_execute_util.h"
+#include "core/column/column_string.h"
#include "core/data_type/data_type.h"
#include "core/data_type/define_primitive_type.h"
#include "core/data_type/primitive_type.h"
@@ -534,17 +536,12 @@ private:
__builtin_unreachable();
}
} else {
- auto& pred_col =
- assert_cast<const
PredicateColumnType<PredicateEvaluateType<Type>>*>(column)
- ->get_data();
- auto pred_col_data = pred_col.data();
-
+ // ptr_at safe: HybridSet::find consumes the pointer synchronously.
+ ColumnElementView<Type> pred_col {*column};
#define EVALUATE_WITH_NULL_IMPL(IDX) \
- is_opposite ^ \
- (!(*null_map)[IDX] && \
- _operator(_values->find(reinterpret_cast<const
T*>(&pred_col_data[IDX])), false))
+ is_opposite ^ (!(*null_map)[IDX] &&
_operator(_values->find(pred_col.ptr_at(IDX)), false))
#define EVALUATE_WITHOUT_NULL_IMPL(IDX) \
- is_opposite ^ _operator(_values->find(reinterpret_cast<const
T*>(&pred_col_data[IDX])), false)
+ is_opposite ^ _operator(_values->find(pred_col.ptr_at(IDX)), false)
EVALUATE_BY_SELECTOR(EVALUATE_WITH_NULL_IMPL,
EVALUATE_WITHOUT_NULL_IMPL)
#undef EVALUATE_WITH_NULL_IMPL
#undef EVALUATE_WITHOUT_NULL_IMPL
@@ -595,15 +592,8 @@ private:
__builtin_unreachable();
}
} else {
- auto* nested_col_ptr =
-
check_and_get_column<PredicateColumnType<PredicateEvaluateType<Type>>>(column);
- if (nested_col_ptr == nullptr) {
- throw Exception(ErrorCode::INTERNAL_ERROR,
- "InListPredicateBase: _base_evaluate_bit get
invalid column type");
- }
-
- auto& data_array = nested_col_ptr->get_data();
-
+ // ptr_at safe: HybridSet::find consumes the pointer synchronously.
+ ColumnElementView<Type> view {*column};
for (uint16_t i = 0; i < size; i++) {
if (is_and ^ flags[i]) {
continue;
@@ -617,17 +607,13 @@ private:
continue;
}
}
-
+ bool hit = _operator(_values->find(view.ptr_at(idx)), false);
if constexpr (!is_opposite) {
- if (is_and ^
- _operator(_values->find(reinterpret_cast<const
T*>(&data_array[idx])),
- false)) {
+ if (is_and ^ hit) {
flags[i] = !is_and;
}
} else {
- if (is_and ^
- !_operator(_values->find(reinterpret_cast<const
T*>(&data_array[idx])),
- false)) {
+ if (is_and ^ !hit) {
flags[i] = !is_and;
}
}
diff --git a/be/src/storage/predicate/like_column_predicate.cpp
b/be/src/storage/predicate/like_column_predicate.cpp
index c1329bcc9ab..f5d468dd9bf 100644
--- a/be/src/storage/predicate/like_column_predicate.cpp
+++ b/be/src/storage/predicate/like_column_predicate.cpp
@@ -17,7 +17,7 @@
#include "storage/predicate/like_column_predicate.h"
-#include "core/column/predicate_column.h"
+#include "core/column/column_string.h"
#include "core/data_type/define_primitive_type.h"
#include "core/string_ref.h"
#include "exprs/function/like.h"
@@ -25,32 +25,25 @@
namespace doris {
-template <PrimitiveType T>
-LikeColumnPredicate<T>::LikeColumnPredicate(bool opposite, uint32_t column_id,
std::string col_name,
- doris::FunctionContext* fn_ctx,
doris::StringRef val)
- : ColumnPredicate(column_id, col_name, T, opposite), pattern(val) {
- static_assert(T == TYPE_VARCHAR || T == TYPE_CHAR || T == TYPE_STRING,
- "LikeColumnPredicate only supports the following types:
TYPE_VARCHAR, TYPE_CHAR, "
- "TYPE_STRING");
+LikeColumnPredicate::LikeColumnPredicate(bool opposite, uint32_t column_id,
std::string col_name,
+ doris::FunctionContext* fn_ctx,
doris::StringRef val)
+ : ColumnPredicate(column_id, col_name, TYPE_STRING, opposite),
pattern(val) {
_state = reinterpret_cast<StateType*>(
fn_ctx->get_function_state(doris::FunctionContext::THREAD_LOCAL));
THROW_IF_ERROR(_state->search_state.clone(_like_state));
}
-template <PrimitiveType T>
-void LikeColumnPredicate<T>::evaluate_vec(const IColumn& column, uint16_t
size, bool* flags) const {
+void LikeColumnPredicate::evaluate_vec(const IColumn& column, uint16_t size,
bool* flags) const {
_evaluate_vec<false>(column, size, flags);
}
-template <PrimitiveType T>
-void LikeColumnPredicate<T>::evaluate_and_vec(const IColumn& column, uint16_t
size,
- bool* flags) const {
+void LikeColumnPredicate::evaluate_and_vec(const IColumn& column, uint16_t
size,
+ bool* flags) const {
_evaluate_vec<true>(column, size, flags);
}
-template <PrimitiveType T>
-uint16_t LikeColumnPredicate<T>::_evaluate_inner(const IColumn& column,
uint16_t* sel,
- uint16_t size) const {
+uint16_t LikeColumnPredicate::_evaluate_inner(const IColumn& column, uint16_t*
sel,
+ uint16_t size) const {
uint16_t new_size = 0;
if (is_column_nullable(column)) {
auto* nullable_col = assert_cast<const ColumnNullable*>(&column);
@@ -80,7 +73,7 @@ uint16_t LikeColumnPredicate<T>::_evaluate_inner(const
IColumn& column, uint16_t
}
}
} else {
- auto* str_col = assert_cast<const
PredicateColumnType<T>*>(&nested_col);
+ auto* str_col = assert_cast<const ColumnString*>(&nested_col);
if (!nullable_col->has_null()) {
ColumnUInt8::Container res(size, 0);
for (uint16_t i = 0; i != size; i++) {
@@ -121,7 +114,7 @@ uint16_t LikeColumnPredicate<T>::_evaluate_inner(const
IColumn& column, uint16_t
new_size += _opposite ^ flag;
}
} else {
- const auto* str_col = assert_cast<const
PredicateColumnType<T>*>(&column);
+ const auto* str_col = assert_cast<const ColumnString*>(&column);
ColumnUInt8::Container res(size, 0);
for (uint16_t i = 0; i != size; i++) {
@@ -137,7 +130,4 @@ uint16_t LikeColumnPredicate<T>::_evaluate_inner(const
IColumn& column, uint16_t
return new_size;
}
-template class LikeColumnPredicate<TYPE_CHAR>;
-template class LikeColumnPredicate<TYPE_STRING>;
-
} //namespace doris
diff --git a/be/src/storage/predicate/like_column_predicate.h
b/be/src/storage/predicate/like_column_predicate.h
index 69cfd6fbc19..9cd74f8a9ad 100644
--- a/be/src/storage/predicate/like_column_predicate.h
+++ b/be/src/storage/predicate/like_column_predicate.h
@@ -43,23 +43,22 @@ class Roaring;
namespace doris {
class FunctionContext;
-template <PrimitiveType T>
class LikeColumnPredicate final : public ColumnPredicate {
public:
ENABLE_FACTORY_CREATOR(LikeColumnPredicate);
LikeColumnPredicate(bool opposite, uint32_t column_id, std::string
col_name,
doris::FunctionContext* fn_ctx, doris::StringRef val);
~LikeColumnPredicate() override = default;
- LikeColumnPredicate(const LikeColumnPredicate<T>& other, uint32_t col_id)
+ LikeColumnPredicate(const LikeColumnPredicate& other, uint32_t col_id)
: ColumnPredicate(other, col_id) {
_origin = other._origin;
pattern = other.pattern;
_state = other._state;
_opposite = other._opposite;
}
- LikeColumnPredicate(const LikeColumnPredicate<T>& other) = delete;
+ LikeColumnPredicate(const LikeColumnPredicate& other) = delete;
std::shared_ptr<ColumnPredicate> clone(uint32_t col_id) const override {
- return LikeColumnPredicate<T>::create_shared(*this, col_id);
+ return LikeColumnPredicate::create_shared(*this, col_id);
}
std::string debug_string() const override {
fmt::memory_buffer debug_string_buffer;
diff --git a/be/src/storage/predicate/predicate_creator.h
b/be/src/storage/predicate/predicate_creator.h
index c219fd5b90c..c4662d75707 100644
--- a/be/src/storage/predicate/predicate_creator.h
+++ b/be/src/storage/predicate/predicate_creator.h
@@ -50,7 +50,7 @@ std::shared_ptr<HybridSetBase> build_set() {
return std::make_shared<std::conditional_t<
is_string_type(TYPE), StringSet<DynamicContainer<std::string>>,
HybridSet<TYPE, DynamicContainer<typename
PrimitiveTypeTraits<TYPE>::CppType>,
-
PredicateColumnType<PredicateEvaluateType<TYPE>>>>>(false);
+ typename PrimitiveTypeTraits<TYPE>::ColumnType>>>(false);
}
std::shared_ptr<ColumnPredicate> create_bloom_filter_predicate(
diff --git a/be/src/storage/schema.cpp b/be/src/storage/schema.cpp
index 99088f93878..27dd40e9d74 100644
--- a/be/src/storage/schema.cpp
+++ b/be/src/storage/schema.cpp
@@ -26,11 +26,13 @@
#include "common/config.h"
#include "core/column/column_array.h"
+#include "core/column/column_decimal.h"
#include "core/column/column_dictionary.h"
#include "core/column/column_map.h"
#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
#include "core/column/column_struct.h"
-#include "core/column/predicate_column.h"
+#include "core/column/column_vector.h"
#include "core/data_type/data_type.h"
#include "core/data_type/data_type_factory.hpp"
#include "core/data_type/define_primitive_type.h"
@@ -89,96 +91,20 @@ DataTypePtr Schema::get_data_type_ptr(const TabletColumn&
column) {
return DataTypeFactory::instance().create_data_type(column);
}
-IColumn::MutablePtr Schema::get_predicate_column_ptr(const FieldType& type,
bool is_nullable,
+IColumn::MutablePtr Schema::get_predicate_column_ptr(const DataTypePtr&
data_type,
const ReaderType
reader_type) {
- IColumn::MutablePtr ptr = nullptr;
- switch (type) {
- case FieldType::OLAP_FIELD_TYPE_BOOL:
- ptr = doris::PredicateColumnType<TYPE_BOOLEAN>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_TINYINT:
- ptr = doris::PredicateColumnType<TYPE_TINYINT>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_SMALLINT:
- ptr = doris::PredicateColumnType<TYPE_SMALLINT>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_INT:
- ptr = doris::PredicateColumnType<TYPE_INT>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_FLOAT:
- ptr = doris::PredicateColumnType<TYPE_FLOAT>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_DOUBLE:
- ptr = doris::PredicateColumnType<TYPE_DOUBLE>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_BIGINT:
- ptr = doris::PredicateColumnType<TYPE_BIGINT>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_LARGEINT:
- ptr = doris::PredicateColumnType<TYPE_LARGEINT>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_DATE:
- ptr = doris::PredicateColumnType<TYPE_DATE>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_DATEV2:
- ptr = doris::PredicateColumnType<TYPE_DATEV2>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
- ptr = doris::PredicateColumnType<TYPE_DATETIMEV2>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_DATETIME:
- ptr = doris::PredicateColumnType<TYPE_DATETIME>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
- ptr = doris::PredicateColumnType<TYPE_TIMESTAMPTZ>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_CHAR:
- if (config::enable_low_cardinality_optimize && reader_type ==
ReaderType::READER_QUERY) {
- ptr = doris::ColumnDictI32::create(type);
- } else {
- ptr = doris::PredicateColumnType<TYPE_CHAR>::create();
+ // Low-cardinality dictionary optimization substitutes a ColumnDictI32 for
the
+ // canonical string column during query reads. Every other case just
materializes
+ // the data type's own canonical column (which already wraps nullable for
us).
+ if (config::enable_low_cardinality_optimize && reader_type ==
ReaderType::READER_QUERY &&
+ is_string_type(data_type->get_primitive_type())) {
+ IColumn::MutablePtr ptr = doris::ColumnDictI32::create();
+ if (data_type->is_nullable()) {
+ return doris::ColumnNullable::create(std::move(ptr),
doris::ColumnUInt8::create());
}
- break;
- case FieldType::OLAP_FIELD_TYPE_VARCHAR:
- case FieldType::OLAP_FIELD_TYPE_STRING:
- case FieldType::OLAP_FIELD_TYPE_JSONB:
- if (config::enable_low_cardinality_optimize && reader_type ==
ReaderType::READER_QUERY) {
- ptr = doris::ColumnDictI32::create(type);
- } else {
- ptr = doris::PredicateColumnType<TYPE_STRING>::create();
- }
- break;
- case FieldType::OLAP_FIELD_TYPE_DECIMAL:
- ptr = doris::PredicateColumnType<TYPE_DECIMALV2>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
- ptr = doris::PredicateColumnType<TYPE_DECIMAL32>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
- ptr = doris::PredicateColumnType<TYPE_DECIMAL64>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
- ptr = doris::PredicateColumnType<TYPE_DECIMAL128I>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
- ptr = doris::PredicateColumnType<TYPE_DECIMAL256>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_IPV4:
- ptr = doris::PredicateColumnType<TYPE_IPV4>::create();
- break;
- case FieldType::OLAP_FIELD_TYPE_IPV6:
- ptr = doris::PredicateColumnType<TYPE_IPV6>::create();
- break;
- default:
- throw Exception(
- ErrorCode::SCHEMA_SCHEMA_FIELD_INVALID,
- fmt::format("Unexpected type when choosing predicate column,
type={}", int(type)));
- }
-
- if (is_nullable) {
- return doris::ColumnNullable::create(std::move(ptr),
doris::ColumnUInt8::create());
+ return ptr;
}
- return ptr;
+ return data_type->create_column();
}
} // namespace doris
diff --git a/be/src/storage/schema.h b/be/src/storage/schema.h
index fd442cc16cc..dcd27fe9c0b 100644
--- a/be/src/storage/schema.h
+++ b/be/src/storage/schema.h
@@ -83,7 +83,7 @@ public:
static DataTypePtr get_data_type_ptr(const TabletColumn& column);
- static IColumn::MutablePtr get_predicate_column_ptr(const FieldType& type,
bool is_nullable,
+ static IColumn::MutablePtr get_predicate_column_ptr(const DataTypePtr&
data_type,
const ReaderType
reader_type);
const std::vector<TabletColumnPtr>& columns() const { return _cols; }
diff --git a/be/src/storage/segment/column_reader.cpp
b/be/src/storage/segment/column_reader.cpp
index 1c6552b4175..7ab5d0e0422 100644
--- a/be/src/storage/segment/column_reader.cpp
+++ b/be/src/storage/segment/column_reader.cpp
@@ -322,8 +322,7 @@ void ColumnReader::check_data_by_zone_map_for_test(const
MutableColumnPtr& dst)
?
assert_cast<ColumnNullable*>(dst.get())->get_nested_column_ptr().get()
: dst.get();
- /// `PredicateColumnType<TYPE_INT>` does not support `void get(size_t n,
Field& res)`,
- /// So here only check `CoumnVector<TYPE_INT>`
+ /// Only verify when the destination column carries Field-accessible
TYPE_INT data.
if (check_and_get_column<ColumnVector<TYPE_INT>>(non_nullable_column) ==
nullptr) {
return;
}
diff --git a/be/src/storage/segment/segment_iterator.cpp
b/be/src/storage/segment/segment_iterator.cpp
index 47886b7e2e6..19a02ddcbc7 100644
--- a/be/src/storage/segment/segment_iterator.cpp
+++ b/be/src/storage/segment/segment_iterator.cpp
@@ -51,7 +51,6 @@
#include "core/column/column_string.h"
#include "core/column/column_variant.h"
#include "core/column/column_vector.h"
-#include "core/column/predicate_column.h"
#include "core/data_type/data_type.h"
#include "core/data_type/data_type_factory.hpp"
#include "core/data_type/data_type_number.h"
@@ -664,8 +663,7 @@ Status SegmentIterator::_lazy_init(Block* block) {
// Here, cid will not go out of bounds
// because the size of _current_return_columns equals
_schema->tablet_columns().size()
_current_return_columns[cid] =
Schema::get_predicate_column_ptr(
- storage_column_type->get_storage_field_type(),
- storage_column_type->is_nullable(),
_opts.io_ctx.reader_type));
+ storage_column_type, _opts.io_ctx.reader_type));
_current_return_columns[cid]->set_rowset_segment_id(
{_segment->rowset_id(), _segment->id()});
_current_return_columns[cid]->reserve(nrows_reserve_limit);
@@ -1309,8 +1307,7 @@ bool
SegmentIterator::_check_apply_by_inverted_index(std::shared_ptr<ColumnPredi
}
// Function filter no apply inverted index
- if (dynamic_cast<LikeColumnPredicate<TYPE_CHAR>*>(pred.get()) != nullptr ||
- dynamic_cast<LikeColumnPredicate<TYPE_STRING>*>(pred.get()) !=
nullptr) {
+ if (dynamic_cast<LikeColumnPredicate*>(pred.get()) != nullptr) {
return false;
}
@@ -2502,15 +2499,14 @@ void SegmentIterator::_update_lsn_col_if_needed(const
std::vector<ColumnId>& col
const Int64 commit_tso = _opts.commit_tso.end_tso() == -1 ? 0 :
_opts.commit_tso.end_tso();
if (_is_pred_column[lsn_col_idx]) {
- auto* lsn_column = assert_cast<PredicateColumnType<TYPE_LARGEINT>*>(
- _current_return_columns[lsn_col_idx].get());
+ auto* lsn_column =
assert_cast<ColumnInt128*>(_current_return_columns[lsn_col_idx].get());
std::vector<Int128> binlog_lsns;
binlog_lsns.reserve(num_rows);
for (size_t j = 0; j < num_rows; j++) {
const Int128 row_id = lsn_column->get_data()[j];
binlog_lsns.emplace_back(make_row_binlog_lsn(commit_tso, row_id));
}
- _current_return_columns[lsn_col_idx]->clear();
+ lsn_column->clear();
for (const auto& binlog_lsn : binlog_lsns) {
lsn_column->insert_data(reinterpret_cast<const
char*>(&binlog_lsn), 0);
}
@@ -2564,9 +2560,8 @@ void SegmentIterator::_update_tso_col_if_needed(const
std::vector<ColumnId>& col
return;
}
- auto* tso_column = assert_cast<PredicateColumnType<TYPE_BIGINT>*>(
- _current_return_columns[tso_col_idx].get());
- _current_return_columns[tso_col_idx]->clear();
+ auto* tso_column =
assert_cast<ColumnInt64*>(_current_return_columns[tso_col_idx].get());
+ tso_column->clear();
auto value = commit_tso;
for (size_t j = 0; j < num_rows; j++) {
tso_column->insert_data(reinterpret_cast<const char*>(&value), 0);
diff --git a/be/src/storage/tablet/tablet_reader.cpp
b/be/src/storage/tablet/tablet_reader.cpp
index 5b513d62a26..46fcfc8c5b4 100644
--- a/be/src/storage/tablet/tablet_reader.cpp
+++ b/be/src/storage/tablet/tablet_reader.cpp
@@ -449,8 +449,7 @@ Status TabletReader::_init_conditions_param(const
ReaderParams& read_params) {
std::inserter(predicates, predicates.begin()));
// Function filter push down to storage engine
auto is_like_predicate = [](std::shared_ptr<ColumnPredicate> _pred) {
- return dynamic_cast<LikeColumnPredicate<TYPE_CHAR>*>(_pred.get()) !=
nullptr ||
- dynamic_cast<LikeColumnPredicate<TYPE_STRING>*>(_pred.get()) !=
nullptr;
+ return dynamic_cast<LikeColumnPredicate*>(_pred.get()) != nullptr;
};
for (const auto& filter : read_params.function_filters) {
diff --git a/be/test/core/column/column_dictionary_test.cpp
b/be/test/core/column/column_dictionary_test.cpp
index 929da3a9d89..e802eae7541 100644
--- a/be/test/core/column/column_dictionary_test.cpp
+++ b/be/test/core/column/column_dictionary_test.cpp
@@ -54,10 +54,10 @@ protected:
column_dict_data = ColumnString::create();
column_dict_indices = ColumnInt32::create();
- column_dict_char =
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_CHAR);
+ column_dict_char = ColumnDictI32::create();
EXPECT_TRUE(column_dict_char->is_dict_empty());
- column_dict_varchar =
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_STRING);
- column_dict_str =
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_STRING);
+ column_dict_varchar = ColumnDictI32::create();
+ column_dict_str = ColumnDictI32::create();
load_columns_data();
}
@@ -184,7 +184,7 @@ TEST_F(ColumnDictionaryTest, insert_default) {
}
*/
TEST_F(ColumnDictionaryTest, clear) {
- auto target_column =
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_CHAR);
+ auto target_column = ColumnDictI32::create();
target_column->insert_many_dict_data(dict_array.data(), dict_array.size());
target_column->clear();
@@ -197,9 +197,7 @@ TEST_F(ColumnDictionaryTest, allocated_bytes) {
EXPECT_EQ(column_dict_char->allocated_bytes(), column_dict_char->size() *
4);
}
TEST_F(ColumnDictionaryTest, has_enough_capacity) {
- EXPECT_THROW(column_dict_char->has_enough_capacity(
- ColumnDictI32(FieldType::OLAP_FIELD_TYPE_VARCHAR)),
- Exception);
+
EXPECT_THROW(column_dict_char->has_enough_capacity(*ColumnDictI32::create()),
Exception);
}
TEST_F(ColumnDictionaryTest, pop_back) {
EXPECT_THROW(column_dict_char->pop_back(9), Exception);
@@ -281,8 +279,7 @@ TEST_F(ColumnDictionaryTest, filter_by_selector) {
test_func(column_dict_char);
}
TEST_F(ColumnDictionaryTest, insert_many_dict_data) {
- ColumnDictI32::MutablePtr tmp_column_dict =
- ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_CHAR);
+ ColumnDictI32::MutablePtr tmp_column_dict = ColumnDictI32::create();
tmp_column_dict->insert_many_dict_data(dict_array.data(),
dict_array.size());
for (size_t i = 0; i != dict_array.size(); ++i) {
EXPECT_EQ(tmp_column_dict->get_value(i), dict_array[i]);
@@ -290,8 +287,7 @@ TEST_F(ColumnDictionaryTest, insert_many_dict_data) {
}
TEST_F(ColumnDictionaryTest, convert_dict_codes_if_necessary) {
{
- ColumnDictI32::MutablePtr tmp_column_dict =
- ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_CHAR);
+ ColumnDictI32::MutablePtr tmp_column_dict = ColumnDictI32::create();
tmp_column_dict->convert_dict_codes_if_necessary();
EXPECT_FALSE(tmp_column_dict->is_dict_sorted());
EXPECT_FALSE(tmp_column_dict->is_dict_code_converted());
@@ -316,7 +312,7 @@ TEST_F(ColumnDictionaryTest, find_code) {
/*
TEST_F(ColumnDictionaryTest, initialize_hash_values_for_runtime_filter) {
ColumnDictI32::MutablePtr tmp_column_dict =
- ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_CHAR);
+ ColumnDictI32::create();
auto dict_data_row_count = column_dict_data->size();
auto dict_indices_row_count = column_dict_indices->size();
tmp_column_dict->reserve(dict_indices_row_count);
@@ -344,8 +340,7 @@ TEST_F(ColumnDictionaryTest, rowset_segment_id) {
EXPECT_EQ(ids.second, segment_id);
}
TEST_F(ColumnDictionaryTest, convert_to_predicate_column_if_dictionary) {
- ColumnDictI32::MutablePtr tmp_column_dict =
- ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_CHAR);
+ ColumnDictI32::MutablePtr tmp_column_dict = ColumnDictI32::create();
auto dict_data_row_count = column_dict_data->size();
auto dict_indices_row_count = column_dict_indices->size();
tmp_column_dict->reserve(dict_indices_row_count);
diff --git a/be/test/core/column/column_nullable_test.cpp
b/be/test/core/column/column_nullable_test.cpp
index 799cc6a9826..6e6a7d8b4f9 100644
--- a/be/test/core/column/column_nullable_test.cpp
+++ b/be/test/core/column/column_nullable_test.cpp
@@ -23,7 +23,7 @@
#include "common/status.h"
#include "core/column/column_nullable_test.h"
-#include "core/column/predicate_column.h"
+#include "core/column/column_vector.h"
#include "core/data_type/data_type.h"
#include "core/data_type/data_type_number.h"
#include "core/data_type/define_primitive_type.h"
@@ -91,8 +91,7 @@ TEST(ColumnNullableTest,
CreateRejectsMismatchedNestedAndNullMapSizes) {
}
TEST(ColumnNullableTest, PredicateTest) {
- auto nullable_pred =
- ColumnNullable::create(PredicateColumnType<TYPE_DATE>::create(),
ColumnUInt8::create());
+ auto nullable_pred = ColumnNullable::create(ColumnDate::create(),
ColumnUInt8::create());
nullable_pred->insert_many_defaults(3);
EXPECT_TRUE(nullable_pred->has_null());
nullable_pred->insert_many_defaults(10);
diff --git a/be/test/core/column/common_column_test.h
b/be/test/core/column/common_column_test.h
index 38aae5461b2..149776b238a 100644
--- a/be/test/core/column/common_column_test.h
+++ b/be/test/core/column/common_column_test.h
@@ -28,6 +28,7 @@
#include "core/column/column.h"
#include "core/column/column_array.h"
+#include "core/column/column_decimal.h"
#include "core/column/column_dictionary.h"
#include "core/column/column_map.h"
#include "core/cow.h"
diff --git a/be/test/core/column/predicate_column_test.cpp
b/be/test/core/column/predicate_column_test.cpp
deleted file mode 100644
index 9d1b4d36140..00000000000
--- a/be/test/core/column/predicate_column_test.cpp
+++ /dev/null
@@ -1,1082 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "core/column/predicate_column.h"
-
-#include <gtest/gtest-message.h>
-#include <gtest/gtest-test-part.h>
-#include <gtest/gtest.h>
-
-#include "common/status.h"
-#include "core/column/column_decimal.h"
-#include "core/column/column_nullable.h"
-#include "core/data_type/data_type.h"
-#include "core/data_type/data_type_number.h"
-#include "core/data_type/define_primitive_type.h"
-#include "core/decimal12.h"
-#include "core/field.h"
-#include "core/types.h"
-#include "core/uint24.h"
-#include "core/value/vdatetime_value.h"
-#include "testutil/column_helper.h"
-
-namespace doris {
-
-// ============================================================================
-// Test size() for all types
-// ============================================================================
-TEST(PredicateColumnTest, SizeBoolean) {
- auto col = PredicateColumnType<TYPE_BOOLEAN>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
- col->insert_default();
- EXPECT_EQ(col->size(), 2);
-}
-
-TEST(PredicateColumnTest, SizeTinyInt) {
- auto col = PredicateColumnType<TYPE_TINYINT>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeSmallInt) {
- auto col = PredicateColumnType<TYPE_SMALLINT>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeBigInt) {
- auto col = PredicateColumnType<TYPE_BIGINT>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeLargeInt) {
- auto col = PredicateColumnType<TYPE_LARGEINT>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeFloat) {
- auto col = PredicateColumnType<TYPE_FLOAT>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeDouble) {
- auto col = PredicateColumnType<TYPE_DOUBLE>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeDate) {
- auto col = PredicateColumnType<TYPE_DATE>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeDateV2) {
- auto col = PredicateColumnType<TYPE_DATEV2>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeDateTime) {
- auto col = PredicateColumnType<TYPE_DATETIME>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeDateTimeV2) {
- auto col = PredicateColumnType<TYPE_DATETIMEV2>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeChar) {
- auto col = PredicateColumnType<TYPE_CHAR>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeVarchar) {
- auto col = PredicateColumnType<TYPE_VARCHAR>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeDecimalV2) {
- auto col = PredicateColumnType<TYPE_DECIMALV2>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeDecimal32) {
- auto col = PredicateColumnType<TYPE_DECIMAL32>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeDecimal64) {
- auto col = PredicateColumnType<TYPE_DECIMAL64>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeDecimal128I) {
- auto col = PredicateColumnType<TYPE_DECIMAL128I>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeDecimal256) {
- auto col = PredicateColumnType<TYPE_DECIMAL256>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeIPv4) {
- auto col = PredicateColumnType<TYPE_IPV4>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, SizeIPv6) {
- auto col = PredicateColumnType<TYPE_IPV6>::create();
- EXPECT_EQ(col->size(), 0);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
-}
-
-// ============================================================================
-// Test insert_data() for all types
-// ============================================================================
-TEST(PredicateColumnTest, InsertDataBoolean) {
- auto col = PredicateColumnType<TYPE_BOOLEAN>::create();
- col->reserve(3);
- UInt8 vals[] = {0, 1, 1};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], 0);
- EXPECT_EQ(col->get_data()[1], 1);
- EXPECT_EQ(col->get_data()[2], 1);
-}
-
-TEST(PredicateColumnTest, InsertDataTinyInt) {
- auto col = PredicateColumnType<TYPE_TINYINT>::create();
- col->reserve(3);
- Int8 vals[] = {-128, 0, 127};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], -128);
- EXPECT_EQ(col->get_data()[1], 0);
- EXPECT_EQ(col->get_data()[2], 127);
-}
-
-TEST(PredicateColumnTest, InsertDataSmallInt) {
- auto col = PredicateColumnType<TYPE_SMALLINT>::create();
- col->reserve(3);
- Int16 vals[] = {-32768, 0, 32767};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], -32768);
- EXPECT_EQ(col->get_data()[1], 0);
- EXPECT_EQ(col->get_data()[2], 32767);
-}
-
-TEST(PredicateColumnTest, InsertDataInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(3);
- Int32 vals[] = {INT32_MIN, 0, INT32_MAX};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], INT32_MIN);
- EXPECT_EQ(col->get_data()[1], 0);
- EXPECT_EQ(col->get_data()[2], INT32_MAX);
-}
-
-TEST(PredicateColumnTest, InsertDataBigInt) {
- auto col = PredicateColumnType<TYPE_BIGINT>::create();
- col->reserve(3);
- Int64 vals[] = {INT64_MIN, 0, INT64_MAX};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], INT64_MIN);
- EXPECT_EQ(col->get_data()[1], 0);
- EXPECT_EQ(col->get_data()[2], INT64_MAX);
-}
-
-TEST(PredicateColumnTest, InsertDataLargeInt) {
- auto col = PredicateColumnType<TYPE_LARGEINT>::create();
- col->reserve(3);
- Int128 vals[] = {-(Int128(1) << 100), Int128(0), Int128(1) << 100};
- for (auto& v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], vals[0]);
- EXPECT_EQ(col->get_data()[1], vals[1]);
- EXPECT_EQ(col->get_data()[2], vals[2]);
-}
-
-TEST(PredicateColumnTest, InsertDataFloat) {
- auto col = PredicateColumnType<TYPE_FLOAT>::create();
- col->reserve(3);
- Float32 vals[] = {-3.14f, 0.0f, 2.718f};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_FLOAT_EQ(col->get_data()[0], -3.14f);
- EXPECT_FLOAT_EQ(col->get_data()[1], 0.0f);
- EXPECT_FLOAT_EQ(col->get_data()[2], 2.718f);
-}
-
-TEST(PredicateColumnTest, InsertDataDouble) {
- auto col = PredicateColumnType<TYPE_DOUBLE>::create();
- col->reserve(3);
- Float64 vals[] = {-3.14159265358979, 0.0, 2.71828182845904};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_DOUBLE_EQ(col->get_data()[0], -3.14159265358979);
- EXPECT_DOUBLE_EQ(col->get_data()[1], 0.0);
- EXPECT_DOUBLE_EQ(col->get_data()[2], 2.71828182845904);
-}
-
-TEST(PredicateColumnTest, InsertDataDateV2) {
- auto col = PredicateColumnType<TYPE_DATEV2>::create();
- col->reserve(3);
- uint32_t vals[] = {20230115, 20230620, 20231231};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
-}
-
-TEST(PredicateColumnTest, InsertDataDateTimeV2) {
- auto col = PredicateColumnType<TYPE_DATETIMEV2>::create();
- col->reserve(3);
- uint64_t vals[] = {1234567890123ULL, 2345678901234ULL, 3456789012345ULL};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
-}
-
-TEST(PredicateColumnTest, InsertDataString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(3);
- std::string s1 = "hello";
- std::string s2 = "world";
- std::string s3 = "";
- col->insert_data(s1.data(), s1.size());
- col->insert_data(s2.data(), s2.size());
- col->insert_data(s3.data(), s3.size());
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(std::string(col->get_data()[0].data, col->get_data()[0].size),
"hello");
- EXPECT_EQ(std::string(col->get_data()[1].data, col->get_data()[1].size),
"world");
- EXPECT_EQ(col->get_data()[2].size, 0);
-}
-
-TEST(PredicateColumnTest, InsertDataDecimal32) {
- auto col = PredicateColumnType<TYPE_DECIMAL32>::create();
- col->reserve(3);
- Decimal32 vals[] = {Decimal32(12345), Decimal32(-67890), Decimal32(0)};
- for (auto& v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], Decimal32(12345));
- EXPECT_EQ(col->get_data()[1], Decimal32(-67890));
- EXPECT_EQ(col->get_data()[2], Decimal32(0));
-}
-
-TEST(PredicateColumnTest, InsertDataDecimal64) {
- auto col = PredicateColumnType<TYPE_DECIMAL64>::create();
- col->reserve(3);
- Decimal64 vals[] = {Decimal64(Int64(123456789012LL)),
Decimal64(Int64(-987654321098LL)),
- Decimal64(Int64(0))};
- for (auto& v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], Decimal64(Int64(123456789012LL)));
- EXPECT_EQ(col->get_data()[1], Decimal64(Int64(-987654321098LL)));
- EXPECT_EQ(col->get_data()[2], Decimal64(Int64(0)));
-}
-
-TEST(PredicateColumnTest, InsertDataIPv4) {
- auto col = PredicateColumnType<TYPE_IPV4>::create();
- col->reserve(3);
- IPv4 vals[] = {IPv4(0x7F000001), IPv4(0xC0A80001), IPv4(0x00000000)};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], IPv4(0x7F000001));
- EXPECT_EQ(col->get_data()[1], IPv4(0xC0A80001));
- EXPECT_EQ(col->get_data()[2], IPv4(0x00000000));
-}
-
-TEST(PredicateColumnTest, InsertDataIPv6) {
- auto col = PredicateColumnType<TYPE_IPV6>::create();
- col->reserve(2);
- IPv6 vals[] = {IPv6(1), IPv6(0)};
- for (auto& v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- EXPECT_EQ(col->size(), 2);
- EXPECT_EQ(col->get_data()[0], IPv6(1));
- EXPECT_EQ(col->get_data()[1], IPv6(0));
-}
-
-// ============================================================================
-// Test insert_many_fix_len_data() for numeric types
-// ============================================================================
-TEST(PredicateColumnTest, InsertManyFixLenDataInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(5);
- Int32 vals[] = {1, 2, 3, 4, 5};
- col->insert_many_fix_len_data(reinterpret_cast<const char*>(vals), 5);
- EXPECT_EQ(col->size(), 5);
- for (int i = 0; i < 5; i++) {
- EXPECT_EQ(col->get_data()[i], i + 1);
- }
-}
-
-TEST(PredicateColumnTest, InsertManyFixLenDataBigInt) {
- auto col = PredicateColumnType<TYPE_BIGINT>::create();
- col->reserve(3);
- Int64 vals[] = {100000000000LL, 200000000000LL, 300000000000LL};
- col->insert_many_fix_len_data(reinterpret_cast<const char*>(vals), 3);
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], 100000000000LL);
- EXPECT_EQ(col->get_data()[1], 200000000000LL);
- EXPECT_EQ(col->get_data()[2], 300000000000LL);
-}
-
-TEST(PredicateColumnTest, InsertManyFixLenDataFloat) {
- auto col = PredicateColumnType<TYPE_FLOAT>::create();
- col->reserve(3);
- Float32 vals[] = {1.1f, 2.2f, 3.3f};
- col->insert_many_fix_len_data(reinterpret_cast<const char*>(vals), 3);
- EXPECT_EQ(col->size(), 3);
- EXPECT_FLOAT_EQ(col->get_data()[0], 1.1f);
- EXPECT_FLOAT_EQ(col->get_data()[1], 2.2f);
- EXPECT_FLOAT_EQ(col->get_data()[2], 3.3f);
-}
-
-TEST(PredicateColumnTest, InsertManyFixLenDataDouble) {
- auto col = PredicateColumnType<TYPE_DOUBLE>::create();
- col->reserve(3);
- Float64 vals[] = {1.11, 2.22, 3.33};
- col->insert_many_fix_len_data(reinterpret_cast<const char*>(vals), 3);
- EXPECT_EQ(col->size(), 3);
- EXPECT_DOUBLE_EQ(col->get_data()[0], 1.11);
- EXPECT_DOUBLE_EQ(col->get_data()[1], 2.22);
- EXPECT_DOUBLE_EQ(col->get_data()[2], 3.33);
-}
-
-TEST(PredicateColumnTest, InsertManyFixLenDataDateV2) {
- auto col = PredicateColumnType<TYPE_DATEV2>::create();
- col->reserve(3);
- uint32_t vals[] = {20230115, 20230620, 20231231};
- col->insert_many_fix_len_data(reinterpret_cast<const char*>(vals), 3);
- EXPECT_EQ(col->size(), 3);
-}
-
-TEST(PredicateColumnTest, InsertManyFixLenDataDateTimeV2) {
- auto col = PredicateColumnType<TYPE_DATETIMEV2>::create();
- col->reserve(3);
- uint64_t vals[] = {1234567890123ULL, 2345678901234ULL, 3456789012345ULL};
- col->insert_many_fix_len_data(reinterpret_cast<const char*>(vals), 3);
- EXPECT_EQ(col->size(), 3);
-}
-
-TEST(PredicateColumnTest, InsertManyFixLenDataDecimal32) {
- auto col = PredicateColumnType<TYPE_DECIMAL32>::create();
- col->reserve(3);
- Decimal32 vals[] = {Decimal32(100), Decimal32(200), Decimal32(300)};
- col->insert_many_fix_len_data(reinterpret_cast<const char*>(vals), 3);
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], Decimal32(100));
- EXPECT_EQ(col->get_data()[1], Decimal32(200));
- EXPECT_EQ(col->get_data()[2], Decimal32(300));
-}
-
-TEST(PredicateColumnTest, InsertManyFixLenDataDecimal64) {
- auto col = PredicateColumnType<TYPE_DECIMAL64>::create();
- col->reserve(3);
- Decimal64 vals[] = {Decimal64(Int64(1000)), Decimal64(Int64(2000)),
Decimal64(Int64(3000))};
- col->insert_many_fix_len_data(reinterpret_cast<const char*>(vals), 3);
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], Decimal64(Int64(1000)));
- EXPECT_EQ(col->get_data()[1], Decimal64(Int64(2000)));
- EXPECT_EQ(col->get_data()[2], Decimal64(Int64(3000)));
-}
-
-TEST(PredicateColumnTest, InsertManyFixLenDataIPv4) {
- auto col = PredicateColumnType<TYPE_IPV4>::create();
- col->reserve(3);
- IPv4 vals[] = {IPv4(1), IPv4(2), IPv4(3)};
- col->insert_many_fix_len_data(reinterpret_cast<const char*>(vals), 3);
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], IPv4(1));
- EXPECT_EQ(col->get_data()[1], IPv4(2));
- EXPECT_EQ(col->get_data()[2], IPv4(3));
-}
-
-// ============================================================================
-// Test insert_many_date() for TYPE_DATE
-// ============================================================================
-TEST(PredicateColumnTest, InsertManyDate) {
- auto col = PredicateColumnType<TYPE_DATE>::create();
- col->reserve(3);
- uint24_t date_vals[3];
- date_vals[0] = (2023 << 9) | (1 << 5) | 15;
- date_vals[1] = (2023 << 9) | (6 << 5) | 20;
- date_vals[2] = (2023 << 9) | (12 << 5) | 31;
- col->insert_many_date(reinterpret_cast<const char*>(date_vals), 3);
- EXPECT_EQ(col->size(), 3);
-}
-
-// ============================================================================
-// Test insert_many_datetime() for TYPE_DATETIME
-// ============================================================================
-TEST(PredicateColumnTest, InsertManyDateTime) {
- auto col = PredicateColumnType<TYPE_DATETIME>::create();
- col->reserve(3);
- uint64_t datetime_vals[] = {20230115120000ULL, 20230620153045ULL,
20231231235959ULL};
- col->insert_many_datetime(reinterpret_cast<const char*>(datetime_vals), 3);
- EXPECT_EQ(col->size(), 3);
-}
-
-// ============================================================================
-// Test insert_many_decimalv2() for TYPE_DECIMALV2
-// ============================================================================
-TEST(PredicateColumnTest, InsertManyDecimalV2) {
- auto col = PredicateColumnType<TYPE_DECIMALV2>::create();
- col->reserve(3);
- decimal12_t decimals[3];
- decimals[0].integer = 123;
- decimals[0].fraction = 456000000;
- decimals[1].integer = -789;
- decimals[1].fraction = 123000000;
- decimals[2].integer = 0;
- decimals[2].fraction = 999000000;
- col->insert_many_fix_len_data(reinterpret_cast<const char*>(decimals), 3);
- EXPECT_EQ(col->size(), 3);
-}
-
-// ============================================================================
-// Test insert_many_strings() for string types
-// ============================================================================
-TEST(PredicateColumnTest, InsertManyStringsString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(5);
- StringRef strings[] = {StringRef("one", 3), StringRef("two", 3),
StringRef("three", 5)};
- col->insert_many_strings(strings, 3);
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(std::string(col->get_data()[0].data, col->get_data()[0].size),
"one");
- EXPECT_EQ(std::string(col->get_data()[1].data, col->get_data()[1].size),
"two");
- EXPECT_EQ(std::string(col->get_data()[2].data, col->get_data()[2].size),
"three");
-}
-
-TEST(PredicateColumnTest, InsertManyStringsVarchar) {
- auto col = PredicateColumnType<TYPE_VARCHAR>::create();
- col->reserve(3);
- StringRef strings[] = {StringRef("abc", 3), StringRef("defgh", 5)};
- col->insert_many_strings(strings, 2);
- EXPECT_EQ(col->size(), 2);
- EXPECT_EQ(std::string(col->get_data()[0].data, col->get_data()[0].size),
"abc");
- EXPECT_EQ(std::string(col->get_data()[1].data, col->get_data()[1].size),
"defgh");
-}
-
-TEST(PredicateColumnTest, InsertManyStringsChar) {
- auto col = PredicateColumnType<TYPE_CHAR>::create();
- col->reserve(3);
- StringRef strings[] = {StringRef("ab", 2), StringRef("cd", 2)};
- col->insert_many_strings(strings, 2);
- EXPECT_EQ(col->size(), 2);
-}
-
-TEST(PredicateColumnTest, InsertManyStringsEmpty) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(3);
- col->insert_many_strings(nullptr, 0);
- EXPECT_EQ(col->size(), 0);
-}
-
-// ============================================================================
-// Test insert_many_continuous_binary_data() for string types
-// ============================================================================
-TEST(PredicateColumnTest, InsertManyContinuousBinaryDataString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(3);
- const char* data = "helloworld!";
- uint32_t offsets[] = {0, 5, 10, 11};
- col->insert_many_continuous_binary_data(data, offsets, 3);
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(std::string(col->get_data()[0].data, col->get_data()[0].size),
"hello");
- EXPECT_EQ(std::string(col->get_data()[1].data, col->get_data()[1].size),
"world");
- EXPECT_EQ(std::string(col->get_data()[2].data, col->get_data()[2].size),
"!");
-}
-
-TEST(PredicateColumnTest, InsertManyContinuousBinaryDataVarchar) {
- auto col = PredicateColumnType<TYPE_VARCHAR>::create();
- col->reserve(2);
- const char* data = "abcdef";
- uint32_t offsets[] = {0, 3, 6};
- col->insert_many_continuous_binary_data(data, offsets, 2);
- EXPECT_EQ(col->size(), 2);
- EXPECT_EQ(std::string(col->get_data()[0].data, col->get_data()[0].size),
"abc");
- EXPECT_EQ(std::string(col->get_data()[1].data, col->get_data()[1].size),
"def");
-}
-
-TEST(PredicateColumnTest, InsertManyContinuousBinaryDataEmpty) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(3);
- const char* data = "test";
- uint32_t offsets[] = {0};
- col->insert_many_continuous_binary_data(data, offsets, 0);
- EXPECT_EQ(col->size(), 0);
-}
-
-// ============================================================================
-// Test insert_many_dict_data() for string types
-// ============================================================================
-TEST(PredicateColumnTest, InsertManyDictDataString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(5);
- StringRef dict[] = {StringRef("apple", 5), StringRef("banana", 6),
StringRef("cherry", 6)};
- int32_t codewords[] = {0, 1, 2, 0, 1};
- col->insert_many_dict_data(codewords, 0, dict, 5, 3);
- EXPECT_EQ(col->size(), 5);
- EXPECT_EQ(std::string(col->get_data()[0].data, col->get_data()[0].size),
"apple");
- EXPECT_EQ(std::string(col->get_data()[1].data, col->get_data()[1].size),
"banana");
- EXPECT_EQ(std::string(col->get_data()[2].data, col->get_data()[2].size),
"cherry");
- EXPECT_EQ(std::string(col->get_data()[3].data, col->get_data()[3].size),
"apple");
- EXPECT_EQ(std::string(col->get_data()[4].data, col->get_data()[4].size),
"banana");
-}
-
-TEST(PredicateColumnTest, InsertManyDictDataVarchar) {
- auto col = PredicateColumnType<TYPE_VARCHAR>::create();
- col->reserve(3);
- StringRef dict[] = {StringRef("x", 1), StringRef("yy", 2)};
- int32_t codewords[] = {0, 1, 0};
- col->insert_many_dict_data(codewords, 0, dict, 3, 2);
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(std::string(col->get_data()[0].data, col->get_data()[0].size),
"x");
- EXPECT_EQ(std::string(col->get_data()[1].data, col->get_data()[1].size),
"yy");
- EXPECT_EQ(std::string(col->get_data()[2].data, col->get_data()[2].size),
"x");
-}
-
-// ============================================================================
-// Test insert_duplicate_fields() for all types
-// ============================================================================
-TEST(PredicateColumnTest, InsertDuplicateFieldsString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(10);
- std::string test_str = "hello";
- Field field = Field::create_field<TYPE_STRING>(test_str);
- col->insert_duplicate_fields(field, 5);
- EXPECT_EQ(col->get_data()[col->size() - 1].data,
- col->get_data()[0].data + (col->size() - 1) * test_str.size());
- EXPECT_EQ(col->size(), 5);
- for (size_t i = 0; i < 5; i++) {
- StringRef ref = col->get_data()[i];
- EXPECT_EQ(std::string(ref.data, ref.size), test_str);
- }
- // Insert another batch to verify memory doesn't overlap
- std::string str2 = "world";
- Field field2 = Field::create_field<TYPE_STRING>(str2);
- col->insert_duplicate_fields(field2, 3);
- EXPECT_EQ(col->size(), 8);
- for (size_t i = 0; i < 5; i++) {
- EXPECT_EQ(std::string(col->get_data()[i].data,
col->get_data()[i].size), test_str);
- }
- for (size_t i = 5; i < 8; i++) {
- EXPECT_EQ(std::string(col->get_data()[i].data,
col->get_data()[i].size), str2);
- }
-}
-
-TEST(PredicateColumnTest, InsertDuplicateFieldsInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(10);
- Int32 val = 42;
- Field field = Field::create_field<TYPE_INT>(val);
- col->insert_duplicate_fields(field, 5);
- EXPECT_EQ(col->size(), 5);
- for (size_t i = 0; i < 5; i++) {
- EXPECT_EQ(col->get_data()[i], 42);
- }
-}
-
-TEST(PredicateColumnTest, InsertDuplicateFieldsLargeInt) {
- auto col = PredicateColumnType<TYPE_LARGEINT>::create();
- col->reserve(10);
- Int128 val = Int128(123456789012345LL) * Int128(1000000000LL);
- Field field = Field::create_field<TYPE_LARGEINT>(val);
- col->insert_duplicate_fields(field, 3);
- EXPECT_EQ(col->size(), 3);
- for (size_t i = 0; i < 3; i++) {
- EXPECT_EQ(col->get_data()[i], val);
- }
-}
-
-TEST(PredicateColumnTest, InsertDuplicateFieldsZeroCount) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(5);
- Int32 val = 42;
- Field field = Field::create_field<TYPE_INT>(val);
- col->insert_duplicate_fields(field, 0);
- EXPECT_EQ(col->size(), 0);
-}
-
-// ============================================================================
-// Test insert_default() for all types
-// ============================================================================
-TEST(PredicateColumnTest, InsertDefaultInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(3);
- col->insert_default();
- col->insert_default();
- EXPECT_EQ(col->size(), 2);
- EXPECT_EQ(col->get_data()[0], 0);
- EXPECT_EQ(col->get_data()[1], 0);
-}
-
-TEST(PredicateColumnTest, InsertDefaultDouble) {
- auto col = PredicateColumnType<TYPE_DOUBLE>::create();
- col->reserve(2);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
- EXPECT_DOUBLE_EQ(col->get_data()[0], 0.0);
-}
-
-TEST(PredicateColumnTest, InsertDefaultString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(2);
- col->insert_default();
- EXPECT_EQ(col->size(), 1);
- EXPECT_EQ(col->get_data()[0].size, 0);
-}
-
-// ============================================================================
-// Test clear() for all types
-// ============================================================================
-TEST(PredicateColumnTest, ClearInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(10);
- Int32 val = 42;
- col->insert_data(reinterpret_cast<const char*>(&val), sizeof(val));
- col->insert_data(reinterpret_cast<const char*>(&val), sizeof(val));
- EXPECT_EQ(col->size(), 2);
- col->clear();
- EXPECT_EQ(col->size(), 0);
- col->insert_data(reinterpret_cast<const char*>(&val), sizeof(val));
- EXPECT_EQ(col->size(), 1);
-}
-
-TEST(PredicateColumnTest, ClearString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(5);
- std::string s = "test";
- col->insert_data(s.data(), s.size());
- EXPECT_EQ(col->size(), 1);
- col->clear();
- EXPECT_EQ(col->size(), 0);
-}
-
-// ============================================================================
-// Test reserve() for all types
-// ============================================================================
-TEST(PredicateColumnTest, ReserveInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(100);
- EXPECT_EQ(col->size(), 0);
- for (int i = 0; i < 100; i++) {
- Int32 val = i;
- col->insert_data(reinterpret_cast<const char*>(&val), sizeof(val));
- }
- EXPECT_EQ(col->size(), 100);
-}
-
-TEST(PredicateColumnTest, ReserveString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(50);
- EXPECT_EQ(col->size(), 0);
-}
-
-// ============================================================================
-// Test byte_size() and allocated_bytes() for all types
-// ============================================================================
-TEST(PredicateColumnTest, ByteSizeInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(5);
- EXPECT_EQ(col->byte_size(), 0);
- Int32 val = 1;
- col->insert_data(reinterpret_cast<const char*>(&val), sizeof(val));
- EXPECT_EQ(col->byte_size(), sizeof(Int32));
- col->insert_data(reinterpret_cast<const char*>(&val), sizeof(val));
- EXPECT_EQ(col->byte_size(), 2 * sizeof(Int32));
- EXPECT_EQ(col->allocated_bytes(), col->byte_size());
-}
-
-TEST(PredicateColumnTest, ByteSizeBigInt) {
- auto col = PredicateColumnType<TYPE_BIGINT>::create();
- col->reserve(3);
- Int64 val = 12345;
- col->insert_data(reinterpret_cast<const char*>(&val), sizeof(val));
- col->insert_data(reinterpret_cast<const char*>(&val), sizeof(val));
- col->insert_data(reinterpret_cast<const char*>(&val), sizeof(val));
- EXPECT_EQ(col->byte_size(), 3 * sizeof(Int64));
- EXPECT_EQ(col->allocated_bytes(), 3 * sizeof(Int64));
-}
-
-TEST(PredicateColumnTest, ByteSizeString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(3);
- std::string s = "hello";
- col->insert_data(s.data(), s.size());
- EXPECT_EQ(col->byte_size(), sizeof(StringRef));
-}
-
-// ============================================================================
-// Test get_name() for all types
-// ============================================================================
-TEST(PredicateColumnTest, GetNameAllTypes) {
- EXPECT_EQ(PredicateColumnType<TYPE_BOOLEAN>::create()->get_name(), "BOOL");
- EXPECT_EQ(PredicateColumnType<TYPE_TINYINT>::create()->get_name(),
"TINYINT");
- EXPECT_EQ(PredicateColumnType<TYPE_SMALLINT>::create()->get_name(),
"SMALLINT");
- EXPECT_EQ(PredicateColumnType<TYPE_INT>::create()->get_name(), "INT");
- EXPECT_EQ(PredicateColumnType<TYPE_BIGINT>::create()->get_name(),
"BIGINT");
- EXPECT_EQ(PredicateColumnType<TYPE_LARGEINT>::create()->get_name(),
"LARGEINT");
- EXPECT_EQ(PredicateColumnType<TYPE_FLOAT>::create()->get_name(), "FLOAT");
- EXPECT_EQ(PredicateColumnType<TYPE_DOUBLE>::create()->get_name(),
"DOUBLE");
- EXPECT_EQ(PredicateColumnType<TYPE_DATE>::create()->get_name(), "DATE");
- EXPECT_EQ(PredicateColumnType<TYPE_DATEV2>::create()->get_name(),
"DATEV2");
- EXPECT_EQ(PredicateColumnType<TYPE_DATETIME>::create()->get_name(),
"DATETIME");
- EXPECT_EQ(PredicateColumnType<TYPE_DATETIMEV2>::create()->get_name(),
"DATETIMEV2");
- EXPECT_EQ(PredicateColumnType<TYPE_CHAR>::create()->get_name(), "CHAR");
- EXPECT_EQ(PredicateColumnType<TYPE_VARCHAR>::create()->get_name(),
"VARCHAR");
- EXPECT_EQ(PredicateColumnType<TYPE_STRING>::create()->get_name(),
"STRING");
- EXPECT_EQ(PredicateColumnType<TYPE_DECIMALV2>::create()->get_name(),
"DECIMALV2");
- EXPECT_EQ(PredicateColumnType<TYPE_DECIMAL32>::create()->get_name(),
"DECIMAL32");
- EXPECT_EQ(PredicateColumnType<TYPE_DECIMAL64>::create()->get_name(),
"DECIMAL64");
- EXPECT_EQ(PredicateColumnType<TYPE_DECIMAL128I>::create()->get_name(),
"DECIMAL128I");
- EXPECT_EQ(PredicateColumnType<TYPE_DECIMAL256>::create()->get_name(),
"DECIMAL256");
- EXPECT_EQ(PredicateColumnType<TYPE_IPV4>::create()->get_name(), "IPV4");
- EXPECT_EQ(PredicateColumnType<TYPE_IPV6>::create()->get_name(), "IPV6");
-}
-
-// ============================================================================
-// Test clone_resized() for all types
-// ============================================================================
-TEST(PredicateColumnTest, CloneResizedInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(5);
- Int32 val = 42;
- col->insert_data(reinterpret_cast<const char*>(&val), sizeof(val));
- auto cloned = col->clone_resized(0);
- EXPECT_EQ(cloned->size(), 0);
-}
-
-TEST(PredicateColumnTest, CloneResizedString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(5);
- std::string s = "test";
- col->insert_data(s.data(), s.size());
- auto cloned = col->clone_resized(0);
- EXPECT_EQ(cloned->size(), 0);
-}
-
-// ============================================================================
-// Test get_data_at() for string types
-// ============================================================================
-TEST(PredicateColumnTest, GetDataAtString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(3);
- std::string s1 = "abc";
- std::string s2 = "defgh";
- col->insert_data(s1.data(), s1.size());
- col->insert_data(s2.data(), s2.size());
- StringRef ref0 = col->get_data_at(0);
- StringRef ref1 = col->get_data_at(1);
- EXPECT_EQ(std::string(ref0.data, ref0.size), "abc");
- EXPECT_EQ(std::string(ref1.data, ref1.size), "defgh");
-}
-
-TEST(PredicateColumnTest, GetDataAtVarchar) {
- auto col = PredicateColumnType<TYPE_VARCHAR>::create();
- col->reserve(2);
- std::string s1 = "test";
- col->insert_data(s1.data(), s1.size());
- StringRef ref = col->get_data_at(0);
- EXPECT_EQ(std::string(ref.data, ref.size), "test");
-}
-
-// ============================================================================
-// Test get_data() for all types
-// ============================================================================
-TEST(PredicateColumnTest, GetDataInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(3);
- Int32 vals[] = {1, 2, 3};
- for (auto v : vals) {
- col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- auto& data = col->get_data();
- EXPECT_EQ(data.size(), 3);
- EXPECT_EQ(data[0], 1);
- EXPECT_EQ(data[1], 2);
- EXPECT_EQ(data[2], 3);
-}
-
-TEST(PredicateColumnTest, GetDataString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(2);
- std::string s1 = "a";
- std::string s2 = "bb";
- col->insert_data(s1.data(), s1.size());
- col->insert_data(s2.data(), s2.size());
- const auto& data = col->get_data();
- EXPECT_EQ(data.size(), 2);
- EXPECT_EQ(std::string(data[0].data, data[0].size), "a");
- EXPECT_EQ(std::string(data[1].data, data[1].size), "bb");
-}
-
-// ============================================================================
-// Test filter_by_selector() for all types
-// ============================================================================
-TEST(PredicateColumnTest, FilterBySelectorInt) {
- auto pred_col = PredicateColumnType<TYPE_INT>::create();
- pred_col->reserve(5);
- Int32 vals[] = {10, 20, 30, 40, 50};
- for (auto v : vals) {
- pred_col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- auto result_col = ColumnInt32::create();
- uint16_t selector[] = {0, 2, 4};
- EXPECT_EQ(pred_col->filter_by_selector(selector, 3, result_col.get()),
Status::OK());
- EXPECT_EQ(result_col->size(), 3);
- EXPECT_EQ(result_col->get_data()[0], 10);
- EXPECT_EQ(result_col->get_data()[1], 30);
- EXPECT_EQ(result_col->get_data()[2], 50);
-}
-
-TEST(PredicateColumnTest, FilterBySelectorBigInt) {
- auto pred_col = PredicateColumnType<TYPE_BIGINT>::create();
- pred_col->reserve(4);
- Int64 vals[] = {100LL, 200LL, 300LL, 400LL};
- for (auto v : vals) {
- pred_col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- auto result_col = ColumnInt64::create();
- uint16_t selector[] = {1, 3};
- EXPECT_EQ(pred_col->filter_by_selector(selector, 2, result_col.get()),
Status::OK());
- EXPECT_EQ(result_col->size(), 2);
- EXPECT_EQ(result_col->get_data()[0], 200LL);
- EXPECT_EQ(result_col->get_data()[1], 400LL);
-}
-
-TEST(PredicateColumnTest, FilterBySelectorFloat) {
- auto pred_col = PredicateColumnType<TYPE_FLOAT>::create();
- pred_col->reserve(3);
- Float32 vals[] = {1.1f, 2.2f, 3.3f};
- for (auto v : vals) {
- pred_col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- auto result_col = ColumnFloat32::create();
- uint16_t selector[] = {0, 2};
- EXPECT_EQ(pred_col->filter_by_selector(selector, 2, result_col.get()),
Status::OK());
- EXPECT_EQ(result_col->size(), 2);
- EXPECT_FLOAT_EQ(result_col->get_data()[0], 1.1f);
- EXPECT_FLOAT_EQ(result_col->get_data()[1], 3.3f);
-}
-
-TEST(PredicateColumnTest, FilterBySelectorDouble) {
- auto pred_col = PredicateColumnType<TYPE_DOUBLE>::create();
- pred_col->reserve(3);
- Float64 vals[] = {1.11, 2.22, 3.33};
- for (auto v : vals) {
- pred_col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- auto result_col = ColumnFloat64::create();
- uint16_t selector[] = {1};
- EXPECT_EQ(pred_col->filter_by_selector(selector, 1, result_col.get()),
Status::OK());
- EXPECT_EQ(result_col->size(), 1);
- EXPECT_DOUBLE_EQ(result_col->get_data()[0], 2.22);
-}
-
-TEST(PredicateColumnTest, FilterBySelectorBoolean) {
- auto pred_col = PredicateColumnType<TYPE_BOOLEAN>::create();
- pred_col->reserve(5);
- UInt8 vals[] = {0, 1, 0, 1, 1};
- for (auto v : vals) {
- pred_col->insert_data(reinterpret_cast<const char*>(&v), sizeof(v));
- }
- auto result_col = ColumnUInt8::create();
- uint16_t selector[] = {1, 3, 4};
- EXPECT_EQ(pred_col->filter_by_selector(selector, 3, result_col.get()),
Status::OK());
- EXPECT_EQ(result_col->size(), 3);
- EXPECT_EQ(result_col->get_data()[0], 1);
- EXPECT_EQ(result_col->get_data()[1], 1);
- EXPECT_EQ(result_col->get_data()[2], 1);
-}
-
-TEST(PredicateColumnTest, FilterBySelectorString) {
- auto pred_col = PredicateColumnType<TYPE_STRING>::create();
- pred_col->reserve(5);
- std::vector<std::string> strings = {"a", "bb", "ccc", "dddd", "eeeee"};
- for (const auto& s : strings) {
- pred_col->insert_data(s.data(), s.size());
- }
- auto result_col = ColumnString::create();
- uint16_t selector[] = {1, 3};
- EXPECT_EQ(pred_col->filter_by_selector(selector, 2, result_col.get()),
Status::OK());
- EXPECT_EQ(result_col->size(), 2);
- EXPECT_EQ(result_col->get_data_at(0).to_string(), "bb");
- EXPECT_EQ(result_col->get_data_at(1).to_string(), "dddd");
-}
-
-TEST(PredicateColumnTest, FilterBySelectorVarchar) {
- auto pred_col = PredicateColumnType<TYPE_VARCHAR>::create();
- pred_col->reserve(3);
- std::vector<std::string> strings = {"x", "yy", "zzz"};
- for (const auto& s : strings) {
- pred_col->insert_data(s.data(), s.size());
- }
- auto result_col = ColumnString::create();
- uint16_t selector[] = {0, 2};
- EXPECT_EQ(pred_col->filter_by_selector(selector, 2, result_col.get()),
Status::OK());
- EXPECT_EQ(result_col->size(), 2);
- EXPECT_EQ(result_col->get_data_at(0).to_string(), "x");
- EXPECT_EQ(result_col->get_data_at(1).to_string(), "zzz");
-}
-
-// ============================================================================
-// Test insert_string_value() for string types
-// ============================================================================
-TEST(PredicateColumnTest, InsertStringValueString) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(5);
- std::vector<std::string> strings = {"hello", "world", "", "test123", "long
string"};
- for (const auto& s : strings) {
- col->insert_string_value(s.data(), s.size());
- }
- EXPECT_EQ(col->size(), 5);
- for (size_t i = 0; i < strings.size(); i++) {
- StringRef ref = col->get_data()[i];
- EXPECT_EQ(std::string(ref.data, ref.size), strings[i]);
- }
-}
-
-// ============================================================================
-// Test insert_in_copy_way() for LARGEINT
-// ============================================================================
-TEST(PredicateColumnTest, InsertInCopyWayLargeInt) {
- auto col = PredicateColumnType<TYPE_LARGEINT>::create();
- col->reserve(3);
- Int128 val1 = Int128(1) << 100;
- Int128 val2 = -val1;
- Int128 val3 = 0;
- col->insert_in_copy_way(reinterpret_cast<const char*>(&val1),
sizeof(val1));
- col->insert_in_copy_way(reinterpret_cast<const char*>(&val2),
sizeof(val2));
- col->insert_in_copy_way(reinterpret_cast<const char*>(&val3),
sizeof(val3));
- EXPECT_EQ(col->size(), 3);
- EXPECT_EQ(col->get_data()[0], val1);
- EXPECT_EQ(col->get_data()[1], val2);
- EXPECT_EQ(col->get_data()[2], val3);
-}
-
-// ============================================================================
-// Test insert_default_type() for numeric types
-// ============================================================================
-TEST(PredicateColumnTest, InsertDefaultTypeInt) {
- auto col = PredicateColumnType<TYPE_INT>::create();
- col->reserve(3);
- Int32 val = 999;
- col->insert_default_type(reinterpret_cast<const char*>(&val), sizeof(val));
- EXPECT_EQ(col->size(), 1);
- EXPECT_EQ(col->get_data()[0], 999);
-}
-
-// ============================================================================
-// Test empty string handling
-// ============================================================================
-TEST(PredicateColumnTest, EmptyStringHandling) {
- auto col = PredicateColumnType<TYPE_STRING>::create();
- col->reserve(5);
- std::vector<std::string> strings = {"", "a", "", "bc", ""};
- for (const auto& s : strings) {
- col->insert_data(s.data(), s.size());
- }
- EXPECT_EQ(col->size(), 5);
- EXPECT_EQ(col->get_data()[0].size, 0);
- EXPECT_EQ(col->get_data()[1].size, 1);
- EXPECT_EQ(col->get_data()[2].size, 0);
- EXPECT_EQ(col->get_data()[3].size, 2);
- EXPECT_EQ(col->get_data()[4].size, 0);
-}
-
-} // namespace doris
\ No newline at end of file
diff --git a/be/test/exprs/bloom_filter_func_test.cpp
b/be/test/exprs/bloom_filter_func_test.cpp
index bcabdcc6a5a..bc0ce276a37 100644
--- a/be/test/exprs/bloom_filter_func_test.cpp
+++ b/be/test/exprs/bloom_filter_func_test.cpp
@@ -183,23 +183,17 @@ TEST_F(BloomFilterFuncTest, InsertFixedLen) {
PODArray<uint16_t> offsets(4);
std::iota(offsets.begin(), offsets.end(), 0);
- std::vector<StringRef> strings(4);
- strings[0] = StringRef("aa");
- strings[1] = StringRef("bb");
- strings[2] = StringRef("cc");
- strings[3] = StringRef("dd");
+ auto probe_column = ColumnHelper::create_column<DataTypeString>({"aa",
"bb", "cc", "dd"});
auto find_count = bloom_filter_func2.find_fixed_len_olap_engine(
- reinterpret_cast<const char*>(strings.data()),
nullmap_column->get_data().data(),
- offsets.data(), 4, false);
+ *probe_column, nullmap_column->get_data().data(), offsets.data(),
4, false);
ASSERT_EQ(find_count, 4);
nullmap_column->get_data()[1] = 0;
nullmap_column->get_data()[3] = 0;
find_count = bloom_filter_func2.find_fixed_len_olap_engine(
- reinterpret_cast<const char*>(strings.data()),
nullmap_column->get_data().data(),
- offsets.data(), 4, false);
+ *probe_column, nullmap_column->get_data().data(), offsets.data(),
4, false);
ASSERT_EQ(find_count, 2);
ASSERT_EQ(offsets[0], 0);
@@ -417,7 +411,7 @@ TEST_F(BloomFilterFuncTest, FindDictOlapEngine) {
std::vector<StringRef> dicts = {StringRef("aa"), StringRef("bb"),
StringRef("cc"),
StringRef("dd"), StringRef("aab"),
StringRef("bbc"),
StringRef("ccd"), StringRef("dde")};
- auto column = ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_VARCHAR);
+ auto column = ColumnDictI32::create();
column->reserve(count);
std::vector<int32_t> data(count);
for (size_t i = 0; i != count; ++i) {
@@ -483,9 +477,8 @@ TEST_F(BloomFilterFuncTest, FindFixedLenOlapEngine) {
PODArray<uint8_t> nullmap;
uint8_t flag = 0;
nullmap.assign(count, flag);
- auto find_count = bloom_filter_func.find_fixed_len_olap_engine(
- reinterpret_cast<const char*>(decimal_column2->get_data().data()),
nullmap.data(),
- offsets.data(), count, true);
+ auto find_count =
bloom_filter_func.find_fixed_len_olap_engine(*decimal_column2, nullmap.data(),
+
offsets.data(), count, true);
ASSERT_EQ(find_count, count);
BloomFilterFunc<PrimitiveType::TYPE_CHAR> bloom_filter_func2(true);
@@ -500,29 +493,28 @@ TEST_F(BloomFilterFuncTest, FindFixedLenOlapEngine) {
// CHAR padding is stripped at the page decoder now, so the runtime BF
// probe sees natural-length StringRefs; no trailing '\0' bytes here.
- StringRef strings[] = {StringRef("aa"), StringRef("bb"), StringRef("cc"),
StringRef("dd"),
- StringRef("ef")};
+ auto probe_column = ColumnHelper::create_column<DataTypeString>({"aa",
"bb", "cc", "dd", "ef"});
PODArray<uint16_t> offsets2(5);
std::iota(offsets2.begin(), offsets2.end(), 0);
- find_count = bloom_filter_func2.find_fixed_len_olap_engine(
- reinterpret_cast<const char*>(&strings[0]), nullmap.data(),
offsets2.data(), 5, false);
+ find_count = bloom_filter_func2.find_fixed_len_olap_engine(*probe_column,
nullmap.data(),
+
offsets2.data(), 5, false);
ASSERT_EQ(find_count, 4);
std::iota(offsets2.begin(), offsets2.end(), 0);
- find_count = bloom_filter_func2.find_fixed_len_olap_engine(
- reinterpret_cast<const char*>(&strings[0]), nullmap.data(),
offsets2.data(), 5, true);
+ find_count = bloom_filter_func2.find_fixed_len_olap_engine(*probe_column,
nullmap.data(),
+
offsets2.data(), 5, true);
ASSERT_EQ(find_count, 4);
std::iota(offsets2.begin(), offsets2.end(), 0);
- find_count = bloom_filter_func2.find_fixed_len_olap_engine(
- reinterpret_cast<const char*>(&strings[0]), nullptr,
offsets2.data(), 5, false);
+ find_count = bloom_filter_func2.find_fixed_len_olap_engine(*probe_column,
nullptr,
+
offsets2.data(), 5, false);
ASSERT_EQ(find_count, 4);
std::iota(offsets2.begin(), offsets2.end(), 0);
- find_count = bloom_filter_func2.find_fixed_len_olap_engine(
- reinterpret_cast<const char*>(&strings[0]), nullptr,
offsets2.data(), 5, true);
+ find_count = bloom_filter_func2.find_fixed_len_olap_engine(*probe_column,
nullptr,
+
offsets2.data(), 5, true);
ASSERT_EQ(find_count, 4);
PODArray<uint8_t> nullmap2;
@@ -531,8 +523,8 @@ TEST_F(BloomFilterFuncTest, FindFixedLenOlapEngine) {
nullmap2[2] = 1;
std::iota(offsets2.begin(), offsets2.end(), 0);
- find_count = bloom_filter_func2.find_fixed_len_olap_engine(
- reinterpret_cast<const char*>(&strings[0]), nullmap2.data(),
offsets2.data(), 5, false);
+ find_count = bloom_filter_func2.find_fixed_len_olap_engine(*probe_column,
nullmap2.data(),
+
offsets2.data(), 5, false);
ASSERT_EQ(find_count, 2);
ASSERT_EQ(offsets2[0], 0);
ASSERT_EQ(offsets2[1], 3);
diff --git a/be/test/format/parquet/byte_array_dict_decoder_test.cpp
b/be/test/format/parquet/byte_array_dict_decoder_test.cpp
index 29213246db3..2e3f398ef56 100644
--- a/be/test/format/parquet/byte_array_dict_decoder_test.cpp
+++ b/be/test/format/parquet/byte_array_dict_decoder_test.cpp
@@ -188,7 +188,7 @@ TEST_F(ByteArrayDictDecoderTest, test_empty_dict) {
// Test decoding with ColumnDictI32
TEST_F(ByteArrayDictDecoderTest, test_decode_with_column_dict_i32) {
// Create ColumnDictI32 column
- MutableColumnPtr column =
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_VARCHAR);
+ MutableColumnPtr column = ColumnDictI32::create();
DataTypePtr data_type = std::make_shared<DataTypeInt32>();
// RLE encoded data: 4 zeros followed by 1, 2, 1, padded to 8 values, [0 0
0 0 1 2 1]
@@ -231,7 +231,7 @@ TEST_F(ByteArrayDictDecoderTest,
test_decode_with_column_dict_i32) {
// Test decoding with ColumnDictI32 and filter
TEST_F(ByteArrayDictDecoderTest, test_decode_with_column_dict_i32_with_filter)
{
// Create ColumnDictI32 column
- MutableColumnPtr column =
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_VARCHAR);
+ MutableColumnPtr column = ColumnDictI32::create();
DataTypePtr data_type = std::make_shared<DataTypeInt32>();
// RLE encoded data: 4 zeros followed by 1, 2, 1, padded to 8 values, [0 0
0 0 1 2 1]
@@ -272,7 +272,7 @@ TEST_F(ByteArrayDictDecoderTest,
test_decode_with_column_dict_i32_with_filter) {
// Test decoding with ColumnDictI32 with filter and null
TEST_F(ByteArrayDictDecoderTest,
test_decode_with_column_dict_i32_with_filter_and_null) {
// Create ColumnDictI32 column
- MutableColumnPtr column =
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_VARCHAR);
+ MutableColumnPtr column = ColumnDictI32::create();
DataTypePtr data_type = std::make_shared<DataTypeInt32>();
// RLE encoded data: 4 zeros followed by 2, padded to 8 values, [0 0 0 0 2]
diff --git a/be/test/format/parquet/fix_length_dict_decoder_test.cpp
b/be/test/format/parquet/fix_length_dict_decoder_test.cpp
index afd419c5469..5c8854b665b 100644
--- a/be/test/format/parquet/fix_length_dict_decoder_test.cpp
+++ b/be/test/format/parquet/fix_length_dict_decoder_test.cpp
@@ -287,7 +287,7 @@ TEST_F(FixLengthDictDecoderTest, test_empty_dict) {
// Test decoding with ColumnDictI32
TEST_F(FixLengthDictDecoderTest, test_decode_with_column_dict_i32) {
// Create ColumnDictI32 column
- MutableColumnPtr column =
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_VARCHAR);
+ MutableColumnPtr column = ColumnDictI32::create();
DataTypePtr data_type = std::make_shared<DataTypeInt32>();
// RLE encoded data: 4 zeros followed by 1, 2, 1, padded to 8 values, [0 0
0 0 1 2 1]
@@ -330,7 +330,7 @@ TEST_F(FixLengthDictDecoderTest,
test_decode_with_column_dict_i32) {
// Test decoding with ColumnDictI32 and filter
TEST_F(FixLengthDictDecoderTest, test_decode_with_column_dict_i32_with_filter)
{
// Create ColumnDictI32 column
- MutableColumnPtr column =
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_VARCHAR);
+ MutableColumnPtr column = ColumnDictI32::create();
DataTypePtr data_type = std::make_shared<DataTypeInt32>();
// RLE encoded data: 4 zeros followed by 1, 2, 1, padded to 8 values, [0 0
0 0 1 2 1]
@@ -371,7 +371,7 @@ TEST_F(FixLengthDictDecoderTest,
test_decode_with_column_dict_i32_with_filter) {
// Test decoding with ColumnDictI32 with filter and null
TEST_F(FixLengthDictDecoderTest,
test_decode_with_column_dict_i32_with_filter_and_null) {
// Create ColumnDictI32 column
- MutableColumnPtr column =
ColumnDictI32::create(FieldType::OLAP_FIELD_TYPE_VARCHAR);
+ MutableColumnPtr column = ColumnDictI32::create();
DataTypePtr data_type = std::make_shared<DataTypeInt32>();
// RLE encoded data: 4 zeros followed by 2, padded to 8 values, [0 0 0 0 2]
diff --git a/be/test/storage/predicate/block_column_predicate_test.cpp
b/be/test/storage/predicate/block_column_predicate_test.cpp
index 30d0bc94dd4..56fe46a4345 100644
--- a/be/test/storage/predicate/block_column_predicate_test.cpp
+++ b/be/test/storage/predicate/block_column_predicate_test.cpp
@@ -32,7 +32,7 @@
#include "common/status.h"
#include "core/column/column.h"
-#include "core/column/predicate_column.h"
+#include "core/column/column_vector.h"
#include "core/data_type/define_primitive_type.h"
#include "core/field.h"
#include "core/type_limit.h"
@@ -77,7 +77,7 @@ public:
TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) {
MutableColumns block;
- block.push_back(PredicateColumnType<TYPE_INT>::create());
+ block.push_back(ColumnInt32::create());
auto value = Field::create_field<TYPE_INT>(5);
int rows = 10;
@@ -97,13 +97,13 @@ TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) {
selected_size = single_column_block_pred.evaluate(block, sel_idx.data(),
selected_size);
EXPECT_EQ(selected_size, 1);
- auto* pred_col =
reinterpret_cast<PredicateColumnType<TYPE_INT>*>(block[col_idx].get());
+ auto* pred_col = reinterpret_cast<ColumnInt32*>(block[col_idx].get());
EXPECT_EQ(pred_col->get_data()[sel_idx[0]], value.template
get<TYPE_INT>());
}
TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) {
MutableColumns block;
- block.push_back(PredicateColumnType<TYPE_INT>::create());
+ block.push_back(ColumnInt32::create());
auto less_value = Field::create_field<TYPE_INT>(5);
auto great_value = Field::create_field<TYPE_INT>(3);
@@ -131,13 +131,13 @@ TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) {
selected_size = and_block_column_pred.evaluate(block, sel_idx.data(),
selected_size);
EXPECT_EQ(selected_size, 1);
- auto* pred_col =
reinterpret_cast<PredicateColumnType<TYPE_INT>*>(block[col_idx].get());
+ auto* pred_col = reinterpret_cast<ColumnInt32*>(block[col_idx].get());
EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 4);
}
TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) {
MutableColumns block;
- block.push_back(PredicateColumnType<TYPE_INT>::create());
+ block.push_back(ColumnInt32::create());
auto less_value = Field::create_field<TYPE_INT>(5);
auto great_value = Field::create_field<TYPE_INT>(3);
@@ -165,13 +165,13 @@ TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) {
selected_size = or_block_column_pred.evaluate(block, sel_idx.data(),
selected_size);
EXPECT_EQ(selected_size, 10);
- auto* pred_col =
reinterpret_cast<PredicateColumnType<TYPE_INT>*>(block[col_idx].get());
+ auto* pred_col = reinterpret_cast<ColumnInt32*>(block[col_idx].get());
EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 0);
}
TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) {
MutableColumns block;
- block.push_back(PredicateColumnType<TYPE_INT>::create());
+ block.push_back(ColumnInt32::create());
auto less_value = Field::create_field<TYPE_INT>(5);
auto great_value = Field::create_field<TYPE_INT>(3);
@@ -208,7 +208,7 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) {
selected_size = or_block_column_pred.evaluate(block, sel_idx.data(),
selected_size);
EXPECT_EQ(selected_size, 4);
- auto* pred_col =
reinterpret_cast<PredicateColumnType<TYPE_INT>*>(block[col_idx].get());
+ auto* pred_col = reinterpret_cast<ColumnInt32*>(block[col_idx].get());
EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 0);
EXPECT_EQ(pred_col->get_data()[sel_idx[1]], 1);
EXPECT_EQ(pred_col->get_data()[sel_idx[2]], 2);
@@ -237,7 +237,7 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) {
TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) {
MutableColumns block;
- block.push_back(PredicateColumnType<TYPE_INT>::create());
+ block.push_back(ColumnInt32::create());
auto less_value = Field::create_field<TYPE_INT>(5);
auto great_value = Field::create_field<TYPE_INT>(3);
@@ -274,7 +274,7 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) {
selected_size = and_block_column_pred.evaluate(block, sel_idx.data(),
selected_size);
- auto* pred_col =
reinterpret_cast<PredicateColumnType<TYPE_INT>*>(block[col_idx].get());
+ auto* pred_col = reinterpret_cast<ColumnInt32*>(block[col_idx].get());
EXPECT_EQ(selected_size, 1);
EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 4);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]