github-actions[bot] commented on code in PR #16934:
URL: https://github.com/apache/doris/pull/16934#discussion_r1111603806
##########
be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:
##########
@@ -513,42 +532,102 @@
}
}
} else {
- const IColumn::Filter& filter =
- assert_cast<const
doris::vectorized::ColumnVector<UInt8>&>(*filter_column)
- .get_data();
+ MutableColumnPtr mutable_holder =
+ filter_column->use_count() == 1
+ ? filter_column->assume_mutable()
+ : filter_column->clone_resized(filter_column->size());
+ ColumnUInt8* mutable_filter_column =
typeid_cast<ColumnUInt8*>(mutable_holder.get());
+ if (!mutable_filter_column) {
+ return Status::InvalidArgument(
+ "Illegal type {} of column for filter. Must be UInt8 or
Nullable(UInt8).",
+ filter_column->get_name());
+ }
- auto* __restrict filter_data = filter.data();
- const size_t size = filter.size();
- for (size_t i = 0; i < size; ++i) {
- (*_filter_ptr)[i] &= filter_data[i];
+ IColumn::Filter& filter = mutable_filter_column->get_data();
+
+ std::vector<FilterContext> filters;
+ if (_position_delete_ctx.has_filter) {
+ filters.emplace_back(_pos_delete_filter_ptr.get(), false);
+ }
+ if (_merge_filter(filter, filters)) {
+ RETURN_IF_ERROR(_filter_block_internal(block, columns_to_filter,
filter));
+ } else {
+ for (auto& col : columns_to_filter) {
+
std::move(*block->get_by_position(col).column).assume_mutable()->clear();
Review Comment:
warning: std::move of the const expression has no effect; remove std::move()
[performance-move-const-arg]
```suggestion
*block->get_by_position(col).column.assume_mutable()->clear();
```
##########
be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:
##########
@@ -496,15 +508,22 @@ Status RowGroupReader::_filter_block(Block* block, const
ColumnPtr filter_column
"Illegal type {} of column for filter. Must be UInt8 or
Nullable(UInt8).",
filter_column->get_name());
}
- auto* __restrict null_map =
nullable_column->get_null_map_data().data();
IColumn::Filter& filter = concrete_column->get_data();
- auto* __restrict filter_data = filter.data();
- const size_t size = filter.size();
- for (size_t i = 0; i < size; ++i) {
- (*_filter_ptr)[i] &= (!null_map[i]) & filter_data[i];
+ const NullMap& null_map = nullable_column->get_null_map_data();
+
+ std::vector<FilterContext> filters;
+ filters.emplace_back(&null_map, true);
+ if (_position_delete_ctx.has_filter) {
+ filters.emplace_back(_pos_delete_filter_ptr.get(), false);
+ }
+ if (_merge_filter(filter, filters)) {
+ RETURN_IF_ERROR(_filter_block_internal(block, columns_to_filter,
filter));
+ } else {
+ for (auto& col : columns_to_filter) {
+
std::move(*block->get_by_position(col).column).assume_mutable()->clear();
Review Comment:
warning: std::move of the const expression has no effect; remove std::move()
[performance-move-const-arg]
```suggestion
*block->get_by_position(col).column.assume_mutable()->clear();
```
##########
be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:
##########
@@ -513,42 +532,102 @@
}
}
} else {
- const IColumn::Filter& filter =
- assert_cast<const
doris::vectorized::ColumnVector<UInt8>&>(*filter_column)
- .get_data();
+ MutableColumnPtr mutable_holder =
+ filter_column->use_count() == 1
+ ? filter_column->assume_mutable()
+ : filter_column->clone_resized(filter_column->size());
+ ColumnUInt8* mutable_filter_column =
typeid_cast<ColumnUInt8*>(mutable_holder.get());
+ if (!mutable_filter_column) {
+ return Status::InvalidArgument(
+ "Illegal type {} of column for filter. Must be UInt8 or
Nullable(UInt8).",
+ filter_column->get_name());
+ }
- auto* __restrict filter_data = filter.data();
- const size_t size = filter.size();
- for (size_t i = 0; i < size; ++i) {
- (*_filter_ptr)[i] &= filter_data[i];
+ IColumn::Filter& filter = mutable_filter_column->get_data();
+
+ std::vector<FilterContext> filters;
+ if (_position_delete_ctx.has_filter) {
+ filters.emplace_back(_pos_delete_filter_ptr.get(), false);
+ }
+ if (_merge_filter(filter, filters)) {
+ RETURN_IF_ERROR(_filter_block_internal(block, columns_to_filter,
filter));
+ } else {
+ for (auto& col : columns_to_filter) {
+
std::move(*block->get_by_position(col).column).assume_mutable()->clear();
+ }
}
- RETURN_IF_ERROR(_filter_block_internal(block, columns_to_filter));
}
Block::erase_useless_column(block, column_to_keep);
return Status::OK();
}
+bool RowGroupReader::_merge_filter(IColumn::Filter& to_filter,
+ std::vector<FilterContext>& from_filters) {
+ auto* __restrict to_filter_data = to_filter.data();
+ for (auto& filter_ctx : from_filters) {
+ auto* filter = filter_ctx.filter;
+ auto* __restrict filter_data = filter->data();
+ size_t count = filter->size() -
simd::count_zero_num((int8_t*)filter_data, filter->size());
+ if (!filter_ctx.reverse) {
+ if (count == 0) { // all zero
+ return false;
+ } else if (count == filter->size()) { // all one
+ continue;
+ } else {
+ const size_t size = filter->size();
+ for (size_t i = 0; i < size; ++i) {
+ to_filter_data[i] &= filter_data[i];
+ }
+ }
+ } else {
+ if (count == filter->size()) { // all zero
+ return false;
+ } else if (count == 0) { // all one
+ continue;
+ } else {
+ const size_t size = filter->size();
+ for (size_t i = 0; i < size; ++i) {
+ to_filter_data[i] &= (!filter_data[i]);
+ }
+ }
+ }
+ }
+ return true;
+}
+
Status RowGroupReader::_filter_block(Block* block, int column_to_keep,
const std::vector<uint32_t>&
columns_to_filter) {
- RETURN_IF_ERROR(_filter_block_internal(block, columns_to_filter));
+ if (_pos_delete_filter_ptr) {
+ RETURN_IF_ERROR(
+ _filter_block_internal(block, columns_to_filter,
(*_pos_delete_filter_ptr)));
+ }
Block::erase_useless_column(block, column_to_keep);
return Status::OK();
}
Status RowGroupReader::_filter_block_internal(Block* block,
- const std::vector<uint32_t>&
columns_to_filter) {
- size_t count = _filter_ptr->size() -
- simd::count_zero_num((int8_t*)_filter_ptr->data(),
_filter_ptr->size());
+ const std::vector<uint32_t>&
columns_to_filter,
+ const IColumn::Filter& filter) {
+ size_t filter_size = filter.size();
+ size_t count = filter_size - simd::count_zero_num((int8_t*)filter.data(),
filter_size);
if (count == 0) {
for (auto& col : columns_to_filter) {
std::move(*block->get_by_position(col).column).assume_mutable()->clear();
}
} else {
for (auto& col : columns_to_filter) {
- if (block->get_by_position(col).column->size() != count) {
- block->get_by_position(col).column =
-
block->get_by_position(col).column->filter(*_filter_ptr, count);
+ size_t size = block->get_by_position(col).column->size();
+ if (size != count) {
+ auto& column = block->get_by_position(col).column;
+ if (column->size() != count) {
+ if (column->use_count() == 1) {
+ const auto result_size =
column->assume_mutable()->filter(filter);
Review Comment:
warning: too few arguments to function call, expected 2, have 1
[clang-diagnostic-error]
```cpp
const auto result_size =
column->assume_mutable()->filter(filter);
^
```
**be/src/vec/columns/column.h:386:** 'filter' declared here
```cpp
virtual Ptr filter(const Filter& filt, ssize_t result_size_hint) const =
0;
^
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]