This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b30ed2dfbab [Chore](column) remove insert_from_not_nullable usage
(#54871)
b30ed2dfbab is described below
commit b30ed2dfbab09ff774ca5e17ab1c53a9f812140a
Author: Pxl <[email protected]>
AuthorDate: Mon Aug 25 18:37:16 2025 +0800
[Chore](column) remove insert_from_not_nullable usage (#54871)
remove insert_from_not_nullable usage
---
be/src/exec/schema_scanner.cpp | 4 +-
.../exec/schema_scanner/schema_scanner_helper.cpp | 14 +-
be/src/pipeline/exec/repeat_operator.cpp | 6 +-
be/src/util/simd/bits.h | 3 +
be/src/vec/columns/column.h | 4 +-
be/src/vec/columns/column_array.cpp | 9 +-
be/src/vec/columns/column_nullable.cpp | 89 ++++------
be/src/vec/columns/column_nullable.h | 190 +++++++--------------
be/src/vec/core/sort_cursor.h | 5 +-
.../data_types/serde/data_type_nullable_serde.cpp | 2 +-
be/src/vec/exec/scan/scanner.cpp | 11 +-
be/src/vec/functions/function_rpc.cpp | 2 +-
be/src/vec/olap/block_reader.cpp | 2 +-
be/src/vec/olap/vertical_block_reader.cpp | 2 +-
be/src/vec/utils/util.hpp | 4 +-
be/test/vec/columns/column_nullable_test.cpp | 2 -
be/test/vec/columns/common_column_test.h | 2 +-
.../data_types/serde/data_type_serde_pb_test.cpp | 14 --
.../vec/data_types/serde/data_type_serde_test.cpp | 15 --
19 files changed, 127 insertions(+), 253 deletions(-)
diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp
index 6ba54e022fc..b1557eaf664 100644
--- a/be/src/exec/schema_scanner.cpp
+++ b/be/src/exec/schema_scanner.cpp
@@ -277,7 +277,7 @@ Status
SchemaScanner::fill_dest_column_for_range(vectorized::Block* block, size_
nullable_column->insert_data(nullptr, 0);
continue;
} else {
- nullable_column->get_null_map_data().emplace_back(0);
+ nullable_column->push_false_to_nullmap(1);
}
switch (col_desc.type) {
case TYPE_HLL: {
@@ -461,7 +461,7 @@ Status SchemaScanner::insert_block_column(TCell cell, int
col_index, vectorized:
return Status::InternalError(ss.str());
}
}
- nullable_column->get_null_map_data().emplace_back(0);
+ nullable_column->push_false_to_nullmap(1);
return Status::OK();
}
diff --git a/be/src/exec/schema_scanner/schema_scanner_helper.cpp
b/be/src/exec/schema_scanner/schema_scanner_helper.cpp
index bcffa4db99b..3e253aa7f95 100644
--- a/be/src/exec/schema_scanner/schema_scanner_helper.cpp
+++ b/be/src/exec/schema_scanner/schema_scanner_helper.cpp
@@ -36,7 +36,7 @@ void SchemaScannerHelper::insert_string_value(int col_index,
std::string str_val
auto* nullable_column =
assert_cast<vectorized::ColumnNullable*>(mutable_col_ptr.get());
vectorized::IColumn* col_ptr = &nullable_column->get_nested_column();
assert_cast<vectorized::ColumnString*>(col_ptr)->insert_data(str_val.data(),
str_val.size());
- nullable_column->get_null_map_data().emplace_back(0);
+ nullable_column->push_false_to_nullmap(1);
}
void SchemaScannerHelper::insert_datetime_value(int col_index, const
std::vector<void*>& datas,
@@ -48,7 +48,7 @@ void SchemaScannerHelper::insert_datetime_value(int
col_index, const std::vector
auto data = datas[0];
assert_cast<vectorized::ColumnDateTime*>(col_ptr)->insert_data(reinterpret_cast<char*>(data),
0);
- nullable_column->get_null_map_data().emplace_back(0);
+ nullable_column->push_false_to_nullmap(1);
}
void SchemaScannerHelper::insert_datetime_value(int col_index, int64_t
timestamp,
@@ -66,7 +66,7 @@ void SchemaScannerHelper::insert_datetime_value(int
col_index, int64_t timestamp
auto data = datas[0];
assert_cast<vectorized::ColumnDateTime*>(col_ptr)->insert_data(reinterpret_cast<char*>(data),
0);
- nullable_column->get_null_map_data().emplace_back(0);
+ nullable_column->push_false_to_nullmap(1);
}
void SchemaScannerHelper::insert_bool_value(int col_index, bool bool_val,
@@ -76,7 +76,7 @@ void SchemaScannerHelper::insert_bool_value(int col_index,
bool bool_val,
auto* nullable_column =
assert_cast<vectorized::ColumnNullable*>(mutable_col_ptr.get());
vectorized::IColumn* col_ptr = &nullable_column->get_nested_column();
assert_cast<vectorized::ColumnBool*>(col_ptr)->insert_value(bool_val);
- nullable_column->get_null_map_data().emplace_back(0);
+ nullable_column->push_false_to_nullmap(1);
}
void SchemaScannerHelper::insert_int32_value(int col_index, int32_t int_val,
@@ -86,7 +86,7 @@ void SchemaScannerHelper::insert_int32_value(int col_index,
int32_t int_val,
auto* nullable_column =
assert_cast<vectorized::ColumnNullable*>(mutable_col_ptr.get());
vectorized::IColumn* col_ptr = &nullable_column->get_nested_column();
assert_cast<vectorized::ColumnInt32*>(col_ptr)->insert_value(int_val);
- nullable_column->get_null_map_data().emplace_back(0);
+ nullable_column->push_false_to_nullmap(1);
}
void SchemaScannerHelper::insert_int64_value(int col_index, int64_t int_val,
@@ -96,7 +96,7 @@ void SchemaScannerHelper::insert_int64_value(int col_index,
int64_t int_val,
auto* nullable_column =
assert_cast<vectorized::ColumnNullable*>(mutable_col_ptr.get());
vectorized::IColumn* col_ptr = &nullable_column->get_nested_column();
assert_cast<vectorized::ColumnInt64*>(col_ptr)->insert_value(int_val);
- nullable_column->get_null_map_data().emplace_back(0);
+ nullable_column->push_false_to_nullmap(1);
}
void SchemaScannerHelper::insert_double_value(int col_index, double double_val,
@@ -106,6 +106,6 @@ void SchemaScannerHelper::insert_double_value(int
col_index, double double_val,
auto* nullable_column =
assert_cast<vectorized::ColumnNullable*>(mutable_col_ptr.get());
vectorized::IColumn* col_ptr = &nullable_column->get_nested_column();
assert_cast<vectorized::ColumnFloat64*>(col_ptr)->insert_value(double_val);
- nullable_column->get_null_map_data().emplace_back(0);
+ nullable_column->push_false_to_nullmap(1);
}
} // namespace doris
diff --git a/be/src/pipeline/exec/repeat_operator.cpp
b/be/src/pipeline/exec/repeat_operator.cpp
index b3c35e5fe54..bbe98f41cca 100644
--- a/be/src/pipeline/exec/repeat_operator.cpp
+++ b/be/src/pipeline/exec/repeat_operator.cpp
@@ -130,6 +130,7 @@ Status
RepeatLocalState::get_repeated_block(vectorized::Block* input_block, int
const bool is_repeat_slot = p._all_slot_ids.contains(slot_id);
const bool is_set_null_slot =
!p._slot_id_set_list[repeat_id_idx].contains(slot_id);
const auto row_size = src_column.column->size();
+ vectorized::ColumnPtr src = src_column.column;
if (is_repeat_slot) {
DCHECK(p._output_slots[cur_col]->is_nullable());
auto* nullable_column =
@@ -139,8 +140,9 @@ Status
RepeatLocalState::get_repeated_block(vectorized::Block* input_block, int
nullable_column->insert_many_defaults(row_size);
} else {
if (!src_column.type->is_nullable()) {
-
nullable_column->insert_range_from_not_nullable(*src_column.column, 0,
- row_size);
+
nullable_column->get_nested_column().insert_range_from(*src_column.column, 0,
+
row_size);
+ nullable_column->push_false_to_nullmap(row_size);
} else {
nullable_column->insert_range_from(*src_column.column, 0,
row_size);
}
diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h
index 5953c651dc6..ad4f4ef1f9a 100644
--- a/be/src/util/simd/bits.h
+++ b/be/src/util/simd/bits.h
@@ -223,6 +223,9 @@ static size_t find_byte(const T* data, size_t start, size_t
end, T byte) {
template <typename T>
bool contain_byte(const T* __restrict data, const size_t length, const signed
char byte) {
+ if (length == 0) {
+ return false;
+ }
return nullptr != std::memchr(reinterpret_cast<const void*>(data), byte,
length);
}
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index b8b88217ea9..406c39460bd 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -571,8 +571,8 @@ public:
// true if column has null element
virtual bool has_null() const { return false; }
- // true if column has null element [0,size)
- virtual bool has_null(size_t size) const { return false; }
+ // true if column has null element [begin, end)
+ virtual bool has_null(size_t begin, size_t end) const { return false; }
virtual bool is_exclusive() const { return use_count() == 1; }
diff --git a/be/src/vec/columns/column_array.cpp
b/be/src/vec/columns/column_array.cpp
index 15fdb9ada25..746f647e36a 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -446,14 +446,11 @@ void ColumnArray::insert_from(const IColumn& src_, size_t
n) {
size_t size = src.size_at(n);
size_t offset = src.offset_at(n);
- if (!get_data().is_nullable() && src.get_data().is_nullable()) {
- // Note: we can't process the case of 'Array(Nullable(nest))'
+ if ((!get_data().is_nullable() && src.get_data().is_nullable()) ||
+ (get_data().is_nullable() && !src.get_data().is_nullable())) {
+ // Note: we can't process the case of 'Array(Nullable(nest))' or
'Array(NotNullable(nest))'
throw Exception(ErrorCode::INTERNAL_ERROR, "insert '{}' into '{}'",
src.get_name(),
get_name());
- } else if (get_data().is_nullable() && !src.get_data().is_nullable()) {
- // Note: here we should process the case of 'Array(NotNullable(nest))'
- reinterpret_cast<ColumnNullable*>(&get_data())
- ->insert_range_from_not_nullable(src.get_data(), offset, size);
} else {
get_data().insert_range_from(src.get_data(), offset, size);
}
diff --git a/be/src/vec/columns/column_nullable.cpp
b/be/src/vec/columns/column_nullable.cpp
index d180bf8dd84..b5537692307 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -26,22 +26,21 @@
#include "vec/common/sip_hash.h"
#include "vec/core/sort_block.h"
#include "vec/data_types/data_type.h"
-#include "vec/utils/util.hpp"
namespace doris::vectorized {
#include "common/compile_check_begin.h"
ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_,
MutableColumnPtr&& null_map_)
- : NullMapProvider(std::move(null_map_)),
nested_column(std::move(nested_column_)) {
+ : _nested_column(std::move(nested_column_)),
_null_map(std::move(null_map_)) {
/// ColumnNullable cannot have constant nested column. But constant
argument could be passed. Materialize it.
- nested_column = get_nested_column().convert_to_full_column_if_const();
+ _nested_column = get_nested_column().convert_to_full_column_if_const();
// after convert const column to full column, it may be a nullable column
- if (nested_column->is_nullable()) {
- assert_cast<ColumnNullable&>(*nested_column)
+ if (_nested_column->is_nullable()) {
+ assert_cast<ColumnNullable&>(*_nested_column)
.apply_null_map(static_cast<const
ColumnUInt8&>(get_null_map_column()));
-
reset_null_map(assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr());
- nested_column =
assert_cast<ColumnNullable&>(*nested_column).get_nested_column_ptr();
+ _null_map =
assert_cast<ColumnNullable&>(*_nested_column).get_null_map_column_ptr();
+ _nested_column =
assert_cast<ColumnNullable&>(*_nested_column).get_nested_column_ptr();
}
if (is_column_const(get_null_map_column())) [[unlikely]] {
@@ -49,7 +48,6 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&&
nested_column_, MutableColumnP
"ColumnNullable cannot have constant null map");
__builtin_unreachable();
}
- _need_update_has_null = true;
}
void ColumnNullable::shrink_padding_chars() {
@@ -58,8 +56,8 @@ void ColumnNullable::shrink_padding_chars() {
void ColumnNullable::update_xxHash_with_value(size_t start, size_t end,
uint64_t& hash,
const uint8_t* __restrict
null_data) const {
- if (!has_null()) {
- nested_column->update_xxHash_with_value(start, end, hash, nullptr);
+ if (!has_null(start, end)) {
+ _nested_column->update_xxHash_with_value(start, end, hash, nullptr);
} else {
const auto* __restrict real_null_data =
assert_cast<const
ColumnUInt8&>(get_null_map_column()).get_data().data();
@@ -68,14 +66,14 @@ void ColumnNullable::update_xxHash_with_value(size_t start,
size_t end, uint64_t
hash = HashUtil::xxHash64NullWithSeed(hash);
}
}
- nested_column->update_xxHash_with_value(start, end, hash,
real_null_data);
+ _nested_column->update_xxHash_with_value(start, end, hash,
real_null_data);
}
}
void ColumnNullable::update_crc_with_value(size_t start, size_t end, uint32_t&
hash,
const uint8_t* __restrict
null_data) const {
- if (!has_null()) {
- nested_column->update_crc_with_value(start, end, hash, nullptr);
+ if (!has_null(start, end)) {
+ _nested_column->update_crc_with_value(start, end, hash, nullptr);
} else {
const auto* __restrict real_null_data =
assert_cast<const
ColumnUInt8&>(get_null_map_column()).get_data().data();
@@ -84,7 +82,7 @@ void ColumnNullable::update_crc_with_value(size_t start,
size_t end, uint32_t& h
hash = HashUtil::zlib_crc_hash_null(hash);
}
}
- nested_column->update_crc_with_value(start, end, hash, real_null_data);
+ _nested_column->update_crc_with_value(start, end, hash,
real_null_data);
}
}
@@ -105,14 +103,14 @@ void ColumnNullable::update_crcs_with_value(uint32_t*
__restrict hashes, doris::
const auto* __restrict real_null_data =
assert_cast<const
ColumnUInt8&>(get_null_map_column()).get_data().data();
if (!has_null()) {
- nested_column->update_crcs_with_value(hashes, type, rows, offset,
nullptr);
+ _nested_column->update_crcs_with_value(hashes, type, rows, offset,
nullptr);
} else {
for (int i = 0; i < s; ++i) {
if (real_null_data[i] != 0) {
hashes[i] = HashUtil::zlib_crc_hash_null(hashes[i]);
}
}
- nested_column->update_crcs_with_value(hashes, type, rows, offset,
real_null_data);
+ _nested_column->update_crcs_with_value(hashes, type, rows, offset,
real_null_data);
}
}
@@ -123,14 +121,14 @@ void ColumnNullable::update_hashes_with_value(uint64_t*
__restrict hashes,
const auto* __restrict real_null_data =
assert_cast<const
ColumnUInt8&>(get_null_map_column()).get_data().data();
if (!has_null()) {
- nested_column->update_hashes_with_value(hashes, nullptr);
+ _nested_column->update_hashes_with_value(hashes, nullptr);
} else {
for (int i = 0; i < s; ++i) {
if (real_null_data[i] != 0) {
hashes[i] = HashUtil::xxHash64NullWithSeed(hashes[i]);
}
}
- nested_column->update_hashes_with_value(hashes, real_null_data);
+ _nested_column->update_hashes_with_value(hashes, real_null_data);
}
}
@@ -177,11 +175,9 @@ void ColumnNullable::insert_data(const char* pos, size_t
length) {
if (pos == nullptr) {
get_nested_column().insert_default();
get_null_map_data().push_back(1);
- _has_null = true;
- _need_update_has_null = false;
} else {
get_nested_column().insert_data(pos, length);
- _push_false_to_nullmap(1);
+ push_false_to_nullmap(1);
}
}
@@ -189,24 +185,23 @@ void ColumnNullable::insert_many_strings(const StringRef*
strings, size_t num) {
auto not_null_count = 0;
for (size_t i = 0; i != num; ++i) {
if (strings[i].data == nullptr) {
- _push_false_to_nullmap(not_null_count);
+ push_false_to_nullmap(not_null_count);
not_null_count = 0;
get_null_map_data().push_back(1);
- _has_null = true;
} else {
not_null_count++;
}
}
if (not_null_count) {
- _push_false_to_nullmap(not_null_count);
+ push_false_to_nullmap(not_null_count);
}
- nested_column->insert_many_strings(strings, num);
+ _nested_column->insert_many_strings(strings, num);
}
void ColumnNullable::insert_many_from(const IColumn& src, size_t position,
size_t length) {
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
get_null_map_column().insert_many_from(nullable_col.get_null_map_column(),
position, length);
- get_nested_column().insert_many_from(*nullable_col.nested_column,
position, length);
+ get_nested_column().insert_many_from(*nullable_col._nested_column,
position, length);
}
StringRef ColumnNullable::serialize_value_into_arena(size_t n, Arena& arena,
@@ -230,8 +225,6 @@ size_t ColumnNullable::deserialize_impl(const char* pos) {
sz += get_nested_column().deserialize_impl(pos + sz);
} else {
get_nested_column().insert_default();
- _has_null = true;
- _need_update_has_null = false;
}
return sz;
}
@@ -264,7 +257,7 @@ void ColumnNullable::serialize_vec(StringRef* keys, size_t
num_rows) const {
(char*)&arr[i]);
keys[i].size += sizeof(NullMap::value_type);
}
- nested_column->serialize_vec(keys, num_rows);
+ _nested_column->serialize_vec(keys, num_rows);
}
}
@@ -284,14 +277,14 @@ void
ColumnNullable::insert_range_from_ignore_overflow(const doris::vectorized::
size_t start, size_t
length) {
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(),
start, length);
-
get_nested_column().insert_range_from_ignore_overflow(*nullable_col.nested_column,
start,
+
get_nested_column().insert_range_from_ignore_overflow(*nullable_col._nested_column,
start,
length);
}
void ColumnNullable::insert_range_from(const IColumn& src, size_t start,
size_t length) {
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(),
start, length);
- get_nested_column().insert_range_from(*nullable_col.nested_column, start,
length);
+ get_nested_column().insert_range_from(*nullable_col._nested_column, start,
length);
}
void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t*
indices_begin,
@@ -309,18 +302,16 @@ void
ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_indices_from(src_concrete.get_nested_column(),
indices_begin,
indices_end);
- _push_false_to_nullmap(indices_end - indices_begin);
+ push_false_to_nullmap(indices_end - indices_begin);
}
void ColumnNullable::insert(const Field& x) {
if (x.is_null()) {
get_nested_column().insert_default();
get_null_map_data().push_back(1);
- _has_null = true;
- _need_update_has_null = false;
} else {
get_nested_column().insert(x);
- _push_false_to_nullmap(1);
+ push_false_to_nullmap(1);
}
}
@@ -341,15 +332,9 @@ void
ColumnNullable::append_data_by_selector(IColumn::MutablePtr& res,
size_t end) const {
auto& res_column = assert_cast<ColumnNullable&>(*res);
auto res_nested_column = res_column.get_nested_column_ptr();
- this->get_nested_column().append_data_by_selector(res_nested_column,
selector, begin, end);
+ get_nested_column().append_data_by_selector(res_nested_column, selector,
begin, end);
auto res_null_map = res_column.get_null_map_column_ptr();
- this->get_null_map_column().append_data_by_selector(res_null_map,
selector, begin, end);
-}
-
-void ColumnNullable::insert_range_from_not_nullable(const IColumn& src, size_t
start,
- size_t length) {
- get_nested_column().insert_range_from(src, start, length);
- _push_false_to_nullmap(length);
+ get_null_map_column().append_data_by_selector(res_null_map, selector,
begin, end);
}
void ColumnNullable::pop_back(size_t n) {
@@ -372,7 +357,7 @@ size_t ColumnNullable::filter(const Filter& filter) {
Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t
sel_size, IColumn* col_ptr) {
auto* nullable_col_ptr = assert_cast<ColumnNullable*>(col_ptr);
- ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column;
+ ColumnPtr nest_col_ptr = nullable_col_ptr->_nested_column;
/// `get_null_map_data` will set `_need_update_has_null` to true
auto& res_nullmap = nullable_col_ptr->get_null_map_data();
@@ -529,7 +514,7 @@ void ColumnNullable::get_permutation(bool reverse, size_t
limit, int null_direct
void ColumnNullable::reserve(size_t n) {
get_nested_column().reserve(n);
- get_null_map_data(false).reserve(n);
+ get_null_map_data().reserve(n);
}
void ColumnNullable::resize(size_t n) {
@@ -594,18 +579,12 @@ void ColumnNullable::sort_column(const ColumnSorter*
sorter, EqualFlags& flags,
last_column);
}
-void ColumnNullable::_update_has_null() {
- const UInt8* null_pos = get_null_map_data().data();
- _has_null = simd::contain_byte(null_pos, get_null_map_data().size(), 1);
- _need_update_has_null = false;
+bool ColumnNullable::has_null(size_t begin, size_t end) const {
+ return simd::contain_byte(get_null_map_data().data() + begin, end - begin,
1);
}
-bool ColumnNullable::has_null(size_t size) const {
- if (!_has_null && !_need_update_has_null) {
- return false;
- }
- const UInt8* null_pos = get_null_map_data().data();
- return simd::contain_byte(null_pos, size, 1);
+bool ColumnNullable::has_null() const {
+ return has_null(0, size());
}
ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable) {
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index 3375d8f2d61..3de193490d2 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -20,12 +20,6 @@
#pragma once
-#include <functional>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
#include "common/compiler_util.h" // IWYU pragma: keep
#include "common/status.h"
#include "olap/olap_common.h"
@@ -49,69 +43,6 @@ class ColumnSorter;
using NullMap = ColumnUInt8::Container;
using ConstNullMapPtr = const NullMap*;
-/// use this to avoid directly access null_map forgetting modify
_need_update_has_null. see more in inner comments
-class NullMapProvider {
-public:
- NullMapProvider() = default;
- NullMapProvider(MutableColumnPtr&& null_map) :
_null_map(std::move(null_map)) {}
- void reset_null_map(MutableColumnPtr&& null_map) { _null_map =
std::move(null_map); }
-
- // return the column that represents the byte map. if want use null_map,
just call this.
- const ColumnPtr& get_null_map_column_ptr() const { return _null_map; }
- // for functions getting nullmap, we assume it will modify it. so set
`_need_update_has_null` to true. if you know it wouldn't,
- // call with arg false. but for the ops which will set _has_null
themselves, call `update_has_null()`
- MutableColumnPtr get_null_map_column_ptr(bool may_change = true) {
- if (may_change) {
- _need_update_has_null = true;
- }
- return _null_map->assume_mutable();
- }
- ColumnUInt8::WrappedPtr& get_null_map(bool may_change = true) {
- if (may_change) {
- _need_update_has_null = true;
- }
- return _null_map;
- }
-
- ColumnUInt8& get_null_map_column(bool may_change = true) {
- if (may_change) {
- _need_update_has_null = true;
- }
- return assert_cast<ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*_null_map);
- }
- const ColumnUInt8& get_null_map_column() const {
- return assert_cast<const ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*_null_map);
- }
-
- NullMap& get_null_map_data(bool may_change = true) {
- return get_null_map_column(may_change).get_data();
- }
- const NullMap& get_null_map_data() const { return
get_null_map_column().get_data(); }
-
- void clear_null_map() {
assert_cast<ColumnUInt8*>(_null_map.get())->clear(); }
-
- void update_has_null(bool new_value) {
- _has_null = new_value;
- _need_update_has_null = false;
- }
-
-protected:
- /**
- * Here we have three variables which serve for `has_null()` judgement. If
we have known the nullity of object, no need
- * to check through the `null_map` to get the answer until the next time
we modify it. Here `_has_null` is just the answer
- * we cached. `_need_update_has_null` indicates there's modification or
not since we got `_has_null()` last time. So in
- * `_has_null()` we can check the two vars to know if there's need to
update `has_null` or not.
- * If you just want QUERY BUT NOT MODIFY, make sure the caller is const.
There will be no perf overhead for const overload.
- * Otherwise, this class, as the base class, will make it no possible to
directly visit `null_map` forgetting to change the
- * protected flags. Just call the interface is ok.
- */
- bool _need_update_has_null = true;
- bool _has_null = true;
-
-private:
- IColumn::WrappedPtr _null_map;
-};
-
/// Class that specifies nullable columns. A nullable column represents
/// a column, which may have any type, provided with the possibility of
/// storing NULL values. For this purpose, a ColumnNullable object stores
@@ -121,7 +52,7 @@ private:
/// over a bitmap because columns are usually stored on disk as compressed
/// files. In this regard, using a bitmap instead of a byte map would
/// greatly complicate the implementation with little to no benefits.
-class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>, public
NullMapProvider {
+class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {
private:
friend class COWHelper<IColumn, ColumnNullable>;
@@ -138,26 +69,28 @@ public:
null_map_->assume_mutable());
}
- template <typename... Args, typename =
std::enable_if_t<IsMutableColumns<Args...>::value>>
- static MutablePtr create(Args&&... args) {
+ template <typename... Args>
+ static MutablePtr create(Args&&... args)
+ requires IsMutableColumns<Args...>::value
+ {
return Base::create(std::forward<Args>(args)...);
}
void sanity_check() const override {
- if (nested_column->size() != get_null_map_data().size()) {
+ if (_nested_column->size() != get_null_map_data().size()) {
throw doris::Exception(
ErrorCode::INTERNAL_ERROR,
"Size of nested column {} with size {} is not equal to
size of null map {}",
- nested_column->get_name(), nested_column->size(),
get_null_map_data().size());
+ _nested_column->get_name(), _nested_column->size(),
get_null_map_data().size());
}
- nested_column->sanity_check();
+ _nested_column->sanity_check();
}
void shrink_padding_chars() override;
- bool is_variable_length() const override { return
nested_column->is_variable_length(); }
+ bool is_variable_length() const override { return
_nested_column->is_variable_length(); }
- std::string get_name() const override { return "Nullable(" +
nested_column->get_name() + ")"; }
+ std::string get_name() const override { return "Nullable(" +
_nested_column->get_name() + ")"; }
MutableColumnPtr clone_resized(size_t size) const override;
size_t size() const override {
return assert_cast<const ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(get_null_map_column())
@@ -170,13 +103,13 @@ public:
Field operator[](size_t n) const override;
void get(size_t n, Field& res) const override;
bool get_bool(size_t n) const override {
- return is_null_at(n) ? false : nested_column->get_bool(n);
+ return is_null_at(n) ? false : _nested_column->get_bool(n);
}
// column must be nullable(uint8)
bool get_bool_inline(size_t n) const {
return is_null_at(n) ? false
: assert_cast<const ColumnUInt8*,
TypeCheckOnRelease::DISABLE>(
- nested_column.get())
+ _nested_column.get())
->get_bool(n);
}
StringRef get_data_at(size_t n) const override;
@@ -219,34 +152,25 @@ public:
void insert_from_with_type(const IColumn& src, size_t n) {
const auto& src_concrete =
assert_cast<const ColumnNullable&,
TypeCheckOnRelease::DISABLE>(src);
- assert_cast<ColumnType*,
TypeCheckOnRelease::DISABLE>(nested_column.get())
+ assert_cast<ColumnType*,
TypeCheckOnRelease::DISABLE>(_nested_column.get())
->insert_from(src_concrete.get_nested_column(), n);
- auto is_null = src_concrete.get_null_map_data()[n];
- if (is_null) {
- get_null_map_data().push_back(1);
- _has_null = true;
- _need_update_has_null = false;
- } else {
- _push_false_to_nullmap(1);
- }
+ _null_map->insert_from(src_concrete.get_null_map_column(), n);
}
- void insert_range_from_not_nullable(const IColumn& src, size_t start,
size_t length);
-
void insert_many_fix_len_data(const char* pos, size_t num) override {
- _push_false_to_nullmap(num);
+ push_false_to_nullmap(num);
get_nested_column().insert_many_fix_len_data(pos, num);
}
void insert_many_raw_data(const char* pos, size_t num) override {
DCHECK(pos);
- _push_false_to_nullmap(num);
+ push_false_to_nullmap(num);
get_nested_column().insert_many_raw_data(pos, num);
}
void insert_many_dict_data(const int32_t* data_array, size_t start_index,
const StringRef* dict,
size_t data_num, uint32_t dict_num) override {
- _push_false_to_nullmap(data_num);
+ push_false_to_nullmap(data_num);
get_nested_column().insert_many_dict_data(data_array, start_index,
dict, data_num,
dict_num);
}
@@ -256,7 +180,7 @@ public:
if (UNLIKELY(num == 0)) {
return;
}
- _push_false_to_nullmap(num);
+ push_false_to_nullmap(num);
get_nested_column().insert_many_continuous_binary_data(data, offsets,
num);
}
@@ -264,20 +188,16 @@ public:
void insert_default() override {
get_nested_column().insert_default();
get_null_map_data().push_back(1);
- _has_null = true;
- _need_update_has_null = false;
}
void insert_many_defaults(size_t length) override {
get_nested_column().insert_many_defaults(length);
get_null_map_data().resize_fill(get_null_map_data().size() + length,
1);
- _has_null = true;
- _need_update_has_null = false;
}
void insert_not_null_elements(size_t num) {
get_nested_column().insert_many_defaults(num);
- _push_false_to_nullmap(num);
+ push_false_to_nullmap(num);
}
void pop_back(size_t n) override;
@@ -313,18 +233,18 @@ public:
const uint8_t* __restrict null_data) const
override;
ColumnPtr convert_column_if_overflow() override {
- nested_column = nested_column->convert_column_if_overflow();
+ _nested_column = _nested_column->convert_column_if_overflow();
return get_ptr();
}
void for_each_subcolumn(ColumnCallback callback) override {
- callback(nested_column);
- callback(get_null_map());
+ callback(_nested_column);
+ callback(_null_map);
}
bool structure_equals(const IColumn& rhs) const override {
if (const auto* rhs_nullable = typeid_cast<const
ColumnNullable*>(&rhs)) {
- return
nested_column->structure_equals(*rhs_nullable->nested_column);
+ return
_nested_column->structure_equals(*rhs_nullable->_nested_column);
}
return false;
}
@@ -334,27 +254,26 @@ public:
bool is_column_string() const override { return
get_nested_column().is_column_string(); }
bool is_exclusive() const override {
- return IColumn::is_exclusive() && nested_column->is_exclusive() &&
+ return IColumn::is_exclusive() && _nested_column->is_exclusive() &&
get_null_map_column().is_exclusive();
}
bool only_null() const override { return size() == 1 && is_null_at(0); }
// used in schema change
- void change_nested_column(ColumnPtr& other) { ((ColumnPtr&)nested_column)
= other; }
+ void change_nested_column(ColumnPtr& other) { ((ColumnPtr&)_nested_column)
= other; }
/// Return the column that represents values.
- IColumn& get_nested_column() { return *nested_column; }
- const IColumn& get_nested_column() const { return *nested_column; }
+ IColumn& get_nested_column() { return *_nested_column; }
+ const IColumn& get_nested_column() const { return *_nested_column; }
- const ColumnPtr& get_nested_column_ptr() const { return nested_column; }
+ const ColumnPtr& get_nested_column_ptr() const { return _nested_column; }
- MutableColumnPtr get_nested_column_ptr() { return
nested_column->assume_mutable(); }
+ MutableColumnPtr get_nested_column_ptr() { return
_nested_column->assume_mutable(); }
void clear() override {
- clear_null_map();
- nested_column->clear();
- _has_null = false;
+ _null_map->clear();
+ _nested_column->clear();
}
/// Apply the null byte map of a specified nullable column onto the
@@ -369,14 +288,9 @@ public:
/// Check that size of null map equals to size of nested column.
void check_consistency() const;
- bool has_null() const override {
- if (UNLIKELY(_need_update_has_null)) {
- const_cast<ColumnNullable*>(this)->_update_has_null();
- }
- return _has_null;
- }
+ bool has_null(size_t begin, size_t end) const override;
- bool has_null(size_t size) const override;
+ bool has_null() const override;
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row =
0) override {
DCHECK(size() > self_row);
@@ -386,12 +300,12 @@ public:
self_row);
if (!nullable_rhs.is_null_at(row)) {
- nested_column->replace_column_data(*nullable_rhs.nested_column,
row, self_row);
+ _nested_column->replace_column_data(*nullable_rhs._nested_column,
row, self_row);
}
}
MutableColumnPtr convert_to_predicate_column_if_dictionary() override {
- nested_column =
get_nested_column().convert_to_predicate_column_if_dictionary();
+ _nested_column =
get_nested_column().convert_to_predicate_column_if_dictionary();
return get_ptr();
}
@@ -437,11 +351,11 @@ public:
EqualRange& range, bool last_column) const override;
void set_rowset_segment_id(std::pair<RowsetId, uint32_t>
rowset_segment_id) override {
- nested_column->set_rowset_segment_id(rowset_segment_id);
+ _nested_column->set_rowset_segment_id(rowset_segment_id);
}
std::pair<RowsetId, uint32_t> get_rowset_segment_id() const override {
- return nested_column->get_rowset_segment_id();
+ return _nested_column->get_rowset_segment_id();
}
void finalize() override { get_nested_column().finalize(); }
@@ -455,19 +369,35 @@ public:
size_t deserialize_impl(const char* pos) override;
size_t serialize_size_at(size_t row) const override {
return sizeof(NullMap::value_type) +
- (is_null_at(row) ? 0 : nested_column->serialize_size_at(row));
+ (is_null_at(row) ? 0 : _nested_column->serialize_size_at(row));
}
-private:
- void _update_has_null();
+ // return the column that represents the byte map. if want use null_map,
just call this.
+ const ColumnPtr& get_null_map_column_ptr() const { return _null_map; }
+ const ColumnUInt8& get_null_map_column() const {
+ return assert_cast<const ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*_null_map);
+ }
+ const NullMap& get_null_map_data() const { return
get_null_map_column().get_data(); }
- template <bool negative>
- void apply_null_map_impl(const ColumnUInt8& map);
+ MutableColumnPtr get_null_map_column_ptr() { return
_null_map->assume_mutable(); }
+ ColumnUInt8& get_null_map_column() {
+ return assert_cast<ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*_null_map);
+ }
+ NullMap& get_null_map_data() { return get_null_map_column().get_data(); }
// push not null value wouldn't change the nullity. no need to update
_has_null
- void _push_false_to_nullmap(size_t num) {
get_null_map_column(false).insert_many_vals(0, num); }
+ void push_false_to_nullmap(size_t num) {
get_null_map_column().insert_many_vals(0, num); }
+ void fill_false_to_nullmap(size_t num) {
+ _null_map->clear();
+ get_null_map_column().insert_many_vals(0, num);
+ }
+
+private:
+ template <bool negative>
+ void apply_null_map_impl(const ColumnUInt8& map);
- WrappedPtr nested_column;
+ WrappedPtr _nested_column;
+ WrappedPtr _null_map;
};
ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable = false);
diff --git a/be/src/vec/core/sort_cursor.h b/be/src/vec/core/sort_cursor.h
index 9327932270b..faced68cd6b 100644
--- a/be/src/vec/core/sort_cursor.h
+++ b/be/src/vec/core/sort_cursor.h
@@ -86,10 +86,7 @@ struct MergeSortCursorImpl {
columns.push_back(col.get());
}
for (auto& column_desc : desc) {
- size_t column_number = !column_desc.column_name.empty()
- ?
block->get_position_by_name(column_desc.column_name)
- : column_desc.column_number;
- sort_columns.push_back(columns[column_number]);
+ sort_columns.push_back(columns[column_desc.column_number]);
}
pos = 0;
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index 470361d1e47..3c749cb7906 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -267,7 +267,7 @@ Status DataTypeNullableSerDe::write_column_to_pb(const
IColumn& column, PValues&
auto row_count = cast_set<int>(end - start);
const auto& nullable_col = assert_cast<const ColumnNullable&>(column);
const auto& null_col = nullable_col.get_null_map_column();
- if (nullable_col.has_null(row_count)) {
+ if (nullable_col.has_null(start, end)) {
result.set_has_null(true);
auto* null_map = result.mutable_null_map();
null_map->Reserve(row_count);
diff --git a/be/src/vec/exec/scan/scanner.cpp b/be/src/vec/exec/scan/scanner.cpp
index 06c67673df7..dc7133927b1 100644
--- a/be/src/vec/exec/scan/scanner.cpp
+++ b/be/src/vec/exec/scan/scanner.cpp
@@ -20,6 +20,7 @@
#include <glog/logging.h>
#include "common/config.h"
+#include "common/status.h"
#include "pipeline/exec/scan_operator.h"
#include "runtime/descriptors.h"
#include "util/defer_op.h"
@@ -190,14 +191,10 @@ Status Scanner::_do_projections(vectorized::Block*
origin_block, vectorized::Blo
RETURN_IF_ERROR(_projections[i]->execute(&input_block,
&result_column_id));
auto column_ptr = input_block.get_by_position(result_column_id)
.column->convert_to_full_column_if_const();
- //TODO: this is a quick fix, we need a new function like
"change_to_nullable" to do it
- if (mutable_columns[i]->is_nullable() xor column_ptr->is_nullable()) {
- DCHECK(mutable_columns[i]->is_nullable() &&
!column_ptr->is_nullable());
- reinterpret_cast<ColumnNullable*>(mutable_columns[i].get())
- ->insert_range_from_not_nullable(*column_ptr, 0, rows);
- } else {
- mutable_columns[i]->insert_range_from(*column_ptr, 0, rows);
+ if (mutable_columns[i]->is_nullable() != column_ptr->is_nullable()) {
+ throw Exception(ErrorCode::INTERNAL_ERROR, "Nullable mismatch");
}
+ mutable_columns[i]->insert_range_from(*column_ptr, 0, rows);
}
DCHECK(mutable_block.rows() == rows);
output_block->set_columns(std::move(mutable_columns));
diff --git a/be/src/vec/functions/function_rpc.cpp
b/be/src/vec/functions/function_rpc.cpp
index 75e0bed743e..1d932323721 100644
--- a/be/src/vec/functions/function_rpc.cpp
+++ b/be/src/vec/functions/function_rpc.cpp
@@ -77,7 +77,7 @@ Status RPCFnImpl::_convert_block_to_proto(Block& block, const
ColumnNumbers& arg
for (size_t col_idx : arguments) {
PValues* arg = request->add_args();
ColumnWithTypeAndName& column = block.get_by_position(col_idx);
- arg->set_has_null(column.column->has_null(row_count));
+ arg->set_has_null(column.column->has_null(0, row_count));
auto col = column.column->convert_to_full_column_if_const();
RETURN_IF_ERROR(column.type->get_serde()->write_column_to_pb(*col,
*arg, 0, row_count));
}
diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp
index f40cb45652c..8417f0e1363 100644
--- a/be/src/vec/olap/block_reader.cpp
+++ b/be/src/vec/olap/block_reader.cpp
@@ -450,7 +450,7 @@ void BlockReader::_update_agg_data(MutableColumns& columns)
{
// calculate has_null_tag
for (auto idx : _agg_columns_idx) {
- _stored_has_null_tag[idx] =
_stored_data_columns[idx]->has_null(copy_size);
+ _stored_has_null_tag[idx] = _stored_data_columns[idx]->has_null(0,
copy_size);
}
// calculate aggregate and insert
diff --git a/be/src/vec/olap/vertical_block_reader.cpp
b/be/src/vec/olap/vertical_block_reader.cpp
index 3bc9ab1be43..5f6e376367d 100644
--- a/be/src/vec/olap/vertical_block_reader.cpp
+++ b/be/src/vec/olap/vertical_block_reader.cpp
@@ -294,7 +294,7 @@ void VerticalBlockReader::_update_agg_data(MutableColumns&
columns) {
// calculate has_null_tag
for (size_t idx = 0; idx < _return_columns.size(); ++idx) {
- _stored_has_null_tag[idx] =
_stored_data_columns[idx]->has_null(copy_size);
+ _stored_has_null_tag[idx] = _stored_data_columns[idx]->has_null(0,
copy_size);
}
// calculate aggregate and insert
diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp
index 2297c411832..13532fe16cf 100644
--- a/be/src/vec/utils/util.hpp
+++ b/be/src/vec/utils/util.hpp
@@ -247,11 +247,11 @@ inline void change_null_to_true(ColumnPtr column,
ColumnPtr argument = nullptr)
auto* __restrict data =
assert_cast<ColumnUInt8*>(nullable->get_nested_column_ptr().get())
->get_data()
.data();
- auto* __restrict null_map =
const_cast<uint8_t*>(nullable->get_null_map_data().data());
+ const NullMap& null_map = nullable->get_null_map_data();
for (size_t i = 0; i < rows; ++i) {
data[i] |= null_map[i];
}
- memset(null_map, 0, rows);
+ nullable->fill_false_to_nullmap(rows);
} else if (argument && argument->has_null()) {
const auto* __restrict null_map =
assert_cast<const
ColumnNullable*>(argument.get())->get_null_map_data().data();
diff --git a/be/test/vec/columns/column_nullable_test.cpp
b/be/test/vec/columns/column_nullable_test.cpp
index 6559aef78da..624a8b2d979 100644
--- a/be/test/vec/columns/column_nullable_test.cpp
+++ b/be/test/vec/columns/column_nullable_test.cpp
@@ -48,8 +48,6 @@ TEST(ColumnNullableTest, NullTest) {
EXPECT_TRUE(dst_col->has_null());
dst_col->clear();
EXPECT_FALSE(dst_col->has_null());
- dst_col->insert_range_from_not_nullable(*source_col, 5, 5);
- EXPECT_FALSE(dst_col->has_null());
dst_col->insert_range_from(
*ColumnNullable::create(std::move(source_col),
ColumnUInt8::create(10)), 5, 5);
EXPECT_FALSE(dst_col->has_null());
diff --git a/be/test/vec/columns/common_column_test.h
b/be/test/vec/columns/common_column_test.h
index 5d462baf122..536fa59c3e8 100644
--- a/be/test/vec/columns/common_column_test.h
+++ b/be/test/vec/columns/common_column_test.h
@@ -2357,7 +2357,7 @@ public:
if (source_column->size() == 1 &&
source_column->is_null_at(0)) {
EXPECT_EQ(source_column->only_null(), true);
EXPECT_EQ(source_column->has_null(), true);
- EXPECT_EQ(source_column->has_null(0), true);
+ EXPECT_EQ(source_column->has_null(0, 0), true);
} else {
EXPECT_EQ(source_column->only_null(), false);
}
diff --git a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
index 993693c0df2..0dc5bf80165 100644
--- a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
@@ -416,20 +416,6 @@ inline void serialize_and_deserialize_pb_test() {
}
// int with 1024 batch size
std::cout << "==== int with 1024 batch size === " << std::endl;
- {
- auto vec = vectorized::ColumnInt32::create();
- auto& data = vec->get_data();
- for (int i = 0; i < 1024; ++i) {
- data.push_back(i);
- }
- vectorized::DataTypePtr
data_type(std::make_shared<vectorized::DataTypeInt32>());
- vectorized::DataTypePtr nullable_data_type(
- std::make_shared<vectorized::DataTypeNullable>(data_type));
- auto nullable_column = nullable_data_type->create_column();
- ((vectorized::ColumnNullable*)nullable_column.get())
- ->insert_range_from_not_nullable(*vec, 0, 1024);
- check_pb_col(nullable_data_type, *nullable_column.get());
- }
// ipv4
std::cout << "==== ipv4 === " << std::endl;
{
diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_test.cpp
index 9ea8e14d462..f4e36d3ab47 100644
--- a/be/test/vec/data_types/serde/data_type_serde_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp
@@ -179,21 +179,6 @@ inline void serialize_and_deserialize_pb_test() {
check_pb_col(nullable_data_type, *nullable_column.get());
}
// int with 1024 batch size
- {
- auto vec = vectorized::ColumnInt32::create();
- auto& data = vec->get_data();
- for (int i = 0; i < 1024; ++i) {
- data.push_back(i);
- }
- std::cout << vec->size() << std::endl;
- vectorized::DataTypePtr
data_type(std::make_shared<vectorized::DataTypeInt32>());
- vectorized::DataTypePtr nullable_data_type(
- std::make_shared<vectorized::DataTypeNullable>(data_type));
- auto nullable_column = nullable_data_type->create_column();
- ((vectorized::ColumnNullable*)nullable_column.get())
- ->insert_range_from_not_nullable(*vec, 0, 1024);
- check_pb_col(nullable_data_type, *nullable_column.get());
- }
// ipv4
{
auto vec = vectorized::ColumnIPv4 ::create();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]