(doris) branch branch-3.0 updated: [enhancement](err-msg) Add detailed column and schema info when failed to create a column iterator (#38689)

dataroaring Fri, 23 Aug 2024 06:41:07 -0700

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 5cd41464bc3 [enhancement](err-msg) Add detailed column and schema info 
when failed to create a column iterator (#38689)
5cd41464bc3 is described below

commit 5cd41464bc346c24a611579cf6cb72867953018e
Author: Siyang Tang <82279870+tangsiyang2...@users.noreply.github.com>
AuthorDate: Wed Aug 21 12:03:37 2024 +0800

    [enhancement](err-msg) Add detailed column and schema info when failed to 
create a column iterator (#38689)
    
    ## Proposed changes
    
    As title.
---
 be/src/cloud/cloud_schema_change_job.cpp  | 10 ++++--
 be/src/olap/rowset/segment_v2/segment.cpp | 51 +++++++++++++++----------------
 2 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/be/src/cloud/cloud_schema_change_job.cpp 
b/be/src/cloud/cloud_schema_change_job.cpp
index ed3e5f9433f..254a0d8d966 100644
--- a/be/src/cloud/cloud_schema_change_job.cpp
+++ b/be/src/cloud/cloud_schema_change_job.cpp
@@ -283,9 +283,13 @@ Status 
CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam
             }
         }
 
-        RETURN_IF_ERROR(sc_procedure->process(rs_reader, rowset_writer.get(), 
_new_tablet,
-                                              _base_tablet, 
_base_tablet_schema,
-                                              _new_tablet_schema));
+        st = sc_procedure->process(rs_reader, rowset_writer.get(), 
_new_tablet, _base_tablet,
+                                   _base_tablet_schema, _new_tablet_schema);
+        if (!st.ok()) {
+            return Status::InternalError(
+                    "failed to process schema change on rowset, 
version=[{}-{}], status={}",
+                    rs_reader->version().first, rs_reader->version().second, 
st.to_string());
+        }
 
         RowsetSharedPtr new_rowset;
         st = rowset_writer->build(new_rowset);
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 54c77c8afc4..2666fc8b633 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -20,9 +20,8 @@
 #include <gen_cpp/PlanNodes_types.h>
 #include <gen_cpp/olap_file.pb.h>
 #include <gen_cpp/segment_v2.pb.h>
-#include <string.h>
 
-#include <algorithm>
+#include <cstring>
 #include <memory>
 #include <utility>
 
@@ -51,17 +50,13 @@
 #include "olap/rowset/segment_v2/segment_writer.h" // k_segment_magic_length
 #include "olap/schema.h"
 #include "olap/short_key_index.h"
-#include "olap/storage_engine.h"
 #include "olap/tablet_schema.h"
 #include "olap/types.h"
 #include "olap/utils.h"
-#include "runtime/define_primitive_type.h"
 #include "runtime/exec_env.h"
-#include "runtime/memory/mem_tracker.h"
 #include "runtime/query_context.h"
 #include "runtime/runtime_predicate.h"
 #include "runtime/runtime_state.h"
-#include "util/bvar_helper.h"
 #include "util/coding.h"
 #include "util/crc32c.h"
 #include "util/slice.h" // Slice
@@ -403,7 +398,7 @@ Status Segment::load_index() {
 Status Segment::_load_index_impl() {
     return _load_index_once.call([this] {
         if (_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != 
nullptr) {
-            _pk_index_reader.reset(new PrimaryKeyIndexReader());
+            _pk_index_reader = std::make_unique<PrimaryKeyIndexReader>();
             RETURN_IF_ERROR(_pk_index_reader->parse_index(_file_reader, 
*_pk_index_meta));
             // _meta_mem_usage += _pk_index_reader->get_memory_size();
             return Status::OK();
@@ -428,7 +423,7 @@ Status Segment::_load_index_impl() {
             DCHECK(footer.has_short_key_page_footer());
 
             // _meta_mem_usage += body.get_size();
-            _sk_index_decoder.reset(new ShortKeyIndexDecoder);
+            _sk_index_decoder = std::make_unique<ShortKeyIndexDecoder>();
             return _sk_index_decoder->parse(body, 
footer.short_key_page_footer());
         }
     });
@@ -440,8 +435,8 @@ vectorized::DataTypePtr 
Segment::get_data_type_of(vectorized::PathInDataPtr path
                                                   bool ignore_children) const {
     // Path has higher priority
     if (path != nullptr && !path->empty()) {
-        auto node = _sub_column_tree.find_leaf(*path);
-        auto sparse_node = _sparse_column_tree.find_exact(*path);
+        const auto* node = _sub_column_tree.find_leaf(*path);
+        const auto* sparse_node = _sparse_column_tree.find_exact(*path);
         if (node) {
             if (ignore_children || (node->children.empty() && sparse_node == 
nullptr)) {
                 return node->data.file_column_type;
@@ -469,7 +464,7 @@ Status Segment::_create_column_readers(const 
SegmentFooterPB& footer) {
     std::unordered_map<vectorized::PathInData, uint32_t, 
vectorized::PathInData::Hash>
             column_path_to_footer_ordinal;
     for (uint32_t ordinal = 0; ordinal < footer.columns().size(); ++ordinal) {
-        auto& column_pb = footer.columns(ordinal);
+        const auto& column_pb = footer.columns(ordinal);
         // column path for accessing subcolumns of variant
         if (column_pb.has_column_path_info()) {
             vectorized::PathInData path;
@@ -484,7 +479,7 @@ Status Segment::_create_column_readers(const 
SegmentFooterPB& footer) {
     }
     // init by unique_id
     for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); 
++ordinal) {
-        auto& column = _tablet_schema->column(ordinal);
+        const auto& column = _tablet_schema->column(ordinal);
         auto iter = column_id_to_footer_ordinal.find(column.unique_id());
         if (iter == column_id_to_footer_ordinal.end()) {
             continue;
@@ -501,7 +496,7 @@ Status Segment::_create_column_readers(const 
SegmentFooterPB& footer) {
 
     // init by column path
     for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); 
++ordinal) {
-        auto& column = _tablet_schema->column(ordinal);
+        const auto& column = _tablet_schema->column(ordinal);
         if (!column.has_path_info()) {
             continue;
         }
@@ -524,7 +519,7 @@ Status Segment::_create_column_readers(const 
SegmentFooterPB& footer) {
                         
vectorized::DataTypeFactory::instance().create_data_type(column_pb)});
         // init sparse columns paths and type info
         for (uint32_t ordinal = 0; ordinal < 
column_pb.sparse_columns().size(); ++ordinal) {
-            auto& spase_column_pb = column_pb.sparse_columns(ordinal);
+            const auto& spase_column_pb = column_pb.sparse_columns(ordinal);
             if (spase_column_pb.has_column_path_info()) {
                 vectorized::PathInData path;
                 path.from_protobuf(spase_column_pb.column_path_info());
@@ -544,7 +539,10 @@ Status Segment::_create_column_readers(const 
SegmentFooterPB& footer) {
 static Status new_default_iterator(const TabletColumn& tablet_column,
                                    std::unique_ptr<ColumnIterator>* iter) {
     if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) {
-        return Status::InternalError("invalid nonexistent column without 
default value.");
+        return Status::InternalError(
+                "invalid nonexistent column without default value. 
column_uid={}, column_name={}, "
+                "column_type={}",
+                tablet_column.unique_id(), tablet_column.name(), 
tablet_column.type());
     }
     auto type_info = get_type_info(&tablet_column);
     std::unique_ptr<DefaultValueColumnIterator> default_value_iter(new 
DefaultValueColumnIterator(
@@ -564,7 +562,7 @@ Status Segment::_new_iterator_with_variant_root(const 
TabletColumn& tablet_colum
                                                 vectorized::DataTypePtr 
target_type_hint) {
     ColumnIterator* it;
     RETURN_IF_ERROR(root->data.reader->new_iterator(&it));
-    auto stream_iter = new ExtractReader(
+    auto* stream_iter = new ExtractReader(
             tablet_column,
             
std::make_unique<StreamReader>(root->data.file_column_type->create_column(),
                                            std::unique_ptr<ColumnIterator>(it),
@@ -584,13 +582,14 @@ Status Segment::new_column_iterator_with_path(const 
TabletColumn& tablet_column,
     } else {
         root_path = 
vectorized::PathInData({tablet_column.path_info_ptr()->get_parts()[0]});
     }
-    auto root = _sub_column_tree.find_leaf(root_path);
-    auto node = tablet_column.has_path_info()
-                        ? 
_sub_column_tree.find_exact(*tablet_column.path_info_ptr())
-                        : nullptr;
-    auto sparse_node = tablet_column.has_path_info()
-                               ? 
_sparse_column_tree.find_exact(*tablet_column.path_info_ptr())
+    const auto* root = _sub_column_tree.find_leaf(root_path);
+    const auto* node = tablet_column.has_path_info()
+                               ? 
_sub_column_tree.find_exact(*tablet_column.path_info_ptr())
                                : nullptr;
+    const auto* sparse_node =
+            tablet_column.has_path_info()
+                    ? 
_sparse_column_tree.find_exact(*tablet_column.path_info_ptr())
+                    : nullptr;
 
     // Currently only compaction and checksum need to read flat leaves
     // They both use tablet_schema_with_merged_max_schema_version as read 
schema
@@ -669,7 +668,7 @@ Status Segment::new_column_iterator(const TabletColumn& 
tablet_column,
         return new_column_iterator_with_path(tablet_column, iter, opt);
     }
     // init default iterator
-    if (_column_readers.count(tablet_column.unique_id()) < 1) {
+    if (!_column_readers.contains(tablet_column.unique_id())) {
         RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
         return Status::OK();
     }
@@ -701,7 +700,7 @@ Status Segment::new_column_iterator(int32_t unique_id, 
std::unique_ptr<ColumnIte
 ColumnReader* Segment::_get_column_reader(const TabletColumn& col) {
     // init column iterator by path info
     if (col.has_path_info() || col.is_variant_type()) {
-        auto node =
+        const auto* node =
                 col.has_path_info() ? 
_sub_column_tree.find_exact(*col.path_info_ptr()) : nullptr;
         if (node != nullptr) {
             return node->data.reader.get();
@@ -709,7 +708,7 @@ ColumnReader* Segment::_get_column_reader(const 
TabletColumn& col) {
         return nullptr;
     }
     auto col_unique_id = col.unique_id();
-    if (_column_readers.count(col_unique_id) > 0) {
+    if (_column_readers.contains(col_unique_id)) {
         return _column_readers[col_unique_id].get();
     }
     return nullptr;
@@ -826,7 +825,7 @@ Status Segment::lookup_row_key(const Slice& key, bool 
with_seq_col, bool with_ro
                 sought_key.get_data() + sought_key_without_seq.get_size() + 
seq_col_length + 1,
                 rowid_length - 1);
         const auto* type_info = 
get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT>();
-        auto rowid_coder = get_key_coder(type_info->type());
+        const auto* rowid_coder = get_key_coder(type_info->type());
         RETURN_IF_ERROR(rowid_coder->decode_ascending(&rowid_slice, 
rowid_length,
                                                       
(uint8_t*)&row_location->row_id));
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

(doris) branch branch-3.0 updated: [enhancement](err-msg) Add detailed column and schema info when failed to create a column iterator (#38689)

Reply via email to