This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d36e9bd523 [chore](scan) Disable low cardinality optimization for
compaction (#18424)
d36e9bd523 is described below
commit d36e9bd5232fa3df72e90dbb79416bf5ef4c45e3
Author: Jerry Hu <[email protected]>
AuthorDate: Fri Apr 7 14:19:11 2023 +0800
[chore](scan) Disable low cardinality optimization for compaction (#18424)
---
be/src/olap/rowset/segment_v2/column_reader.cpp | 1 +
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +++-
be/src/olap/schema.cpp | 14 +++++++-------
be/src/olap/schema.h | 2 +-
4 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 3dc04a1587..5ddf1d5ec9 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -861,6 +861,7 @@ Status FileColumnIterator::init(const
ColumnIteratorOptions& opts) {
}
RETURN_IF_ERROR(get_block_compression_codec(_reader->get_compression(),
&_compress_codec));
if (config::enable_low_cardinality_optimize &&
+ opts.io_ctx.reader_type == ReaderType::READER_QUERY &&
_reader->encoding_info()->encoding() == DICT_ENCODING) {
auto dict_encoding_type = _reader->get_dict_encoding_type();
if (dict_encoding_type == ColumnReader::UNKNOWN_DICT_ENCODING) {
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e020178667..a73bc5c808 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1325,6 +1325,7 @@ bool
SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) {
if (field_type == OLAP_FIELD_TYPE_VARCHAR || field_type ==
OLAP_FIELD_TYPE_CHAR ||
field_type == OLAP_FIELD_TYPE_STRING) {
return config::enable_low_cardinality_optimize &&
+ _opts.io_ctx.reader_type == ReaderType::READER_QUERY &&
_column_iterators[_schema.unique_id(cid)]->is_all_dict_encoding();
} else if (field_type == OLAP_FIELD_TYPE_DECIMAL) {
return false;
@@ -1625,7 +1626,8 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
auto cid = _schema.column_id(i);
auto column_desc = _schema.column(cid);
if (_is_pred_column[cid]) {
- _current_return_columns[cid] =
Schema::get_predicate_column_ptr(*column_desc);
+ _current_return_columns[cid] =
+ Schema::get_predicate_column_ptr(*column_desc,
_opts.io_ctx.reader_type);
_current_return_columns[cid]->set_rowset_segment_id(
{_segment->rowset_id(), _segment->id()});
_current_return_columns[cid]->reserve(_opts.block_row_max);
diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp
index 5283713437..aa3f07e232 100644
--- a/be/src/olap/schema.cpp
+++ b/be/src/olap/schema.cpp
@@ -118,7 +118,7 @@ vectorized::IColumn::MutablePtr
Schema::get_column_by_field(const Field& field)
}
vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field&
field,
- bool
is_nullable) {
+ const
ReaderType reader_type) {
vectorized::IColumn::MutablePtr ptr = nullptr;
switch (field.type()) {
case OLAP_FIELD_TYPE_BOOL:
@@ -160,7 +160,7 @@ vectorized::IColumn::MutablePtr
Schema::get_predicate_column_ptr(const Field& fi
case OLAP_FIELD_TYPE_CHAR:
case OLAP_FIELD_TYPE_VARCHAR:
case OLAP_FIELD_TYPE_STRING:
- if (config::enable_low_cardinality_optimize) {
+ if (config::enable_low_cardinality_optimize && reader_type ==
ReaderType::READER_QUERY) {
ptr =
doris::vectorized::ColumnDictionary<doris::vectorized::Int32>::create(
field.type());
} else {
@@ -181,29 +181,29 @@ vectorized::IColumn::MutablePtr
Schema::get_predicate_column_ptr(const Field& fi
break;
case OLAP_FIELD_TYPE_ARRAY:
ptr = doris::vectorized::ColumnArray::create(
- get_predicate_column_ptr(*field.get_sub_field(0)),
+ get_predicate_column_ptr(*field.get_sub_field(0), reader_type),
doris::vectorized::ColumnArray::ColumnOffsets::create());
break;
case OLAP_FIELD_TYPE_STRUCT: {
size_t field_size = field.get_sub_field_count();
doris::vectorized::MutableColumns columns(field_size);
for (size_t i = 0; i < field_size; i++) {
- columns[i] = get_predicate_column_ptr(*field.get_sub_field(i));
+ columns[i] = get_predicate_column_ptr(*field.get_sub_field(i),
reader_type);
}
ptr = doris::vectorized::ColumnStruct::create(std::move(columns));
break;
}
case OLAP_FIELD_TYPE_MAP:
ptr = doris::vectorized::ColumnMap::create(
- get_predicate_column_ptr(*field.get_sub_field(0)),
- get_predicate_column_ptr(*field.get_sub_field(1)),
+ get_predicate_column_ptr(*field.get_sub_field(0), reader_type),
+ get_predicate_column_ptr(*field.get_sub_field(1), reader_type),
doris::vectorized::ColumnArray::ColumnOffsets::create());
break;
default:
LOG(FATAL) << "Unexpected type when choosing predicate column, type="
<< field.type();
}
- if (field.is_nullable() || is_nullable) {
+ if (field.is_nullable()) {
return doris::vectorized::ColumnNullable::create(std::move(ptr),
doris::vectorized::ColumnUInt8::create());
}
diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h
index cc3a02fb7b..4d9594b344 100644
--- a/be/src/olap/schema.h
+++ b/be/src/olap/schema.h
@@ -132,7 +132,7 @@ public:
static vectorized::IColumn::MutablePtr get_column_by_field(const Field&
field);
static vectorized::IColumn::MutablePtr get_predicate_column_ptr(const
Field& field,
- bool
is_nullable = false);
+ const
ReaderType reader_type);
const std::vector<Field*>& columns() const { return _cols; }
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]