This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d36e9bd523 [chore](scan) Disable low cardinality optimization for 
compaction (#18424)
d36e9bd523 is described below

commit d36e9bd5232fa3df72e90dbb79416bf5ef4c45e3
Author: Jerry Hu <[email protected]>
AuthorDate: Fri Apr 7 14:19:11 2023 +0800

    [chore](scan) Disable low cardinality optimization for compaction (#18424)
---
 be/src/olap/rowset/segment_v2/column_reader.cpp    |  1 +
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  4 +++-
 be/src/olap/schema.cpp                             | 14 +++++++-------
 be/src/olap/schema.h                               |  2 +-
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 3dc04a1587..5ddf1d5ec9 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -861,6 +861,7 @@ Status FileColumnIterator::init(const 
ColumnIteratorOptions& opts) {
     }
     RETURN_IF_ERROR(get_block_compression_codec(_reader->get_compression(), 
&_compress_codec));
     if (config::enable_low_cardinality_optimize &&
+        opts.io_ctx.reader_type == ReaderType::READER_QUERY &&
         _reader->encoding_info()->encoding() == DICT_ENCODING) {
         auto dict_encoding_type = _reader->get_dict_encoding_type();
         if (dict_encoding_type == ColumnReader::UNKNOWN_DICT_ENCODING) {
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e020178667..a73bc5c808 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1325,6 +1325,7 @@ bool 
SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) {
         if (field_type == OLAP_FIELD_TYPE_VARCHAR || field_type == 
OLAP_FIELD_TYPE_CHAR ||
             field_type == OLAP_FIELD_TYPE_STRING) {
             return config::enable_low_cardinality_optimize &&
+                   _opts.io_ctx.reader_type == ReaderType::READER_QUERY &&
                    
_column_iterators[_schema.unique_id(cid)]->is_all_dict_encoding();
         } else if (field_type == OLAP_FIELD_TYPE_DECIMAL) {
             return false;
@@ -1625,7 +1626,8 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
             auto cid = _schema.column_id(i);
             auto column_desc = _schema.column(cid);
             if (_is_pred_column[cid]) {
-                _current_return_columns[cid] = 
Schema::get_predicate_column_ptr(*column_desc);
+                _current_return_columns[cid] =
+                        Schema::get_predicate_column_ptr(*column_desc, 
_opts.io_ctx.reader_type);
                 _current_return_columns[cid]->set_rowset_segment_id(
                         {_segment->rowset_id(), _segment->id()});
                 _current_return_columns[cid]->reserve(_opts.block_row_max);
diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp
index 5283713437..aa3f07e232 100644
--- a/be/src/olap/schema.cpp
+++ b/be/src/olap/schema.cpp
@@ -118,7 +118,7 @@ vectorized::IColumn::MutablePtr 
Schema::get_column_by_field(const Field& field)
 }
 
 vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& 
field,
-                                                                 bool 
is_nullable) {
+                                                                 const 
ReaderType reader_type) {
     vectorized::IColumn::MutablePtr ptr = nullptr;
     switch (field.type()) {
     case OLAP_FIELD_TYPE_BOOL:
@@ -160,7 +160,7 @@ vectorized::IColumn::MutablePtr 
Schema::get_predicate_column_ptr(const Field& fi
     case OLAP_FIELD_TYPE_CHAR:
     case OLAP_FIELD_TYPE_VARCHAR:
     case OLAP_FIELD_TYPE_STRING:
-        if (config::enable_low_cardinality_optimize) {
+        if (config::enable_low_cardinality_optimize && reader_type == 
ReaderType::READER_QUERY) {
             ptr = 
doris::vectorized::ColumnDictionary<doris::vectorized::Int32>::create(
                     field.type());
         } else {
@@ -181,29 +181,29 @@ vectorized::IColumn::MutablePtr 
Schema::get_predicate_column_ptr(const Field& fi
         break;
     case OLAP_FIELD_TYPE_ARRAY:
         ptr = doris::vectorized::ColumnArray::create(
-                get_predicate_column_ptr(*field.get_sub_field(0)),
+                get_predicate_column_ptr(*field.get_sub_field(0), reader_type),
                 doris::vectorized::ColumnArray::ColumnOffsets::create());
         break;
     case OLAP_FIELD_TYPE_STRUCT: {
         size_t field_size = field.get_sub_field_count();
         doris::vectorized::MutableColumns columns(field_size);
         for (size_t i = 0; i < field_size; i++) {
-            columns[i] = get_predicate_column_ptr(*field.get_sub_field(i));
+            columns[i] = get_predicate_column_ptr(*field.get_sub_field(i), 
reader_type);
         }
         ptr = doris::vectorized::ColumnStruct::create(std::move(columns));
         break;
     }
     case OLAP_FIELD_TYPE_MAP:
         ptr = doris::vectorized::ColumnMap::create(
-                get_predicate_column_ptr(*field.get_sub_field(0)),
-                get_predicate_column_ptr(*field.get_sub_field(1)),
+                get_predicate_column_ptr(*field.get_sub_field(0), reader_type),
+                get_predicate_column_ptr(*field.get_sub_field(1), reader_type),
                 doris::vectorized::ColumnArray::ColumnOffsets::create());
         break;
     default:
         LOG(FATAL) << "Unexpected type when choosing predicate column, type=" 
<< field.type();
     }
 
-    if (field.is_nullable() || is_nullable) {
+    if (field.is_nullable()) {
         return doris::vectorized::ColumnNullable::create(std::move(ptr),
                                                          
doris::vectorized::ColumnUInt8::create());
     }
diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h
index cc3a02fb7b..4d9594b344 100644
--- a/be/src/olap/schema.h
+++ b/be/src/olap/schema.h
@@ -132,7 +132,7 @@ public:
     static vectorized::IColumn::MutablePtr get_column_by_field(const Field& 
field);
 
     static vectorized::IColumn::MutablePtr get_predicate_column_ptr(const 
Field& field,
-                                                                    bool 
is_nullable = false);
+                                                                    const 
ReaderType reader_type);
 
     const std::vector<Field*>& columns() const { return _cols; }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to