xiaokang commented on code in PR #43003: URL: https://github.com/apache/doris/pull/43003#discussion_r1827260223
########## be/src/olap/compaction.cpp: ########## @@ -644,7 +644,7 @@ Status Compaction::do_inverted_index_compaction() { Status status = Status::OK(); for (auto&& column_uniq_id : ctx.columns_to_do_index_compaction) { auto col = _cur_tablet_schema->column_by_uid(column_uniq_id); - const auto* index_meta = _cur_tablet_schema->get_inverted_index(col); + const auto* index_meta = _cur_tablet_schema->inverted_index(col); Review Comment: Why change this name? ########## be/src/olap/rowset/segment_v2/column_writer.h: ########## @@ -63,7 +63,7 @@ struct ColumnWriterOptions { bool need_inverted_index = false; uint8_t gram_size; uint16_t gram_bf_size; - std::vector<const TabletIndex*> indexes; + std::vector<const TabletIndex*> indexes; // unused Review Comment: Why not delete it? ########## be/src/olap/tablet_schema.h: ########## @@ -376,7 +382,15 @@ class TabletSchema : public MetadataAdder<TabletSchema> { void set_row_store_page_size(long page_size) { _row_store_page_size = page_size; } long row_store_page_size() const { return _row_store_page_size; } - const std::vector<TabletIndex>& indexes() const { return _indexes; } + const std::vector<const TabletIndex*> inverted_indexes() const { + std::vector<const TabletIndex*> inverted_indexes; Review Comment: It will cause allocate and free small vector memory. ########## be/src/vec/common/schema_util.cpp: ########## @@ -591,4 +592,21 @@ Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst) return Status::OK(); } +bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* old_schema, + int32_t new_col_idx, int32_t old_col_idx) { + const auto& column_new = new_schema->column(new_col_idx); + const auto& column_old = old_schema->column(old_col_idx); + + if (column_new.is_bf_column() != column_old.is_bf_column() || + column_new.has_bitmap_index() != column_old.has_bitmap_index()) { + return true; + } + + bool new_schema_has_inverted_index = new_schema->inverted_index(column_new); + bool old_schema_has_inverted_index = old_schema->inverted_index(column_old); + + // TODO(scy): ngram index Review Comment: ngram bf index is included in bf index. ########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -1057,16 +1057,17 @@ Status SegmentIterator::_init_inverted_index_iterators() { return Status::OK(); } for (auto cid : _schema->column_ids()) { + // Use segment’s own index_meta, for compatibility with future indexing needs to default to lowercase. if (_inverted_index_iterators[cid] == nullptr) { - // Not check type valid, since we need to get inverted index for related variant type when reading the segment. - // If check type valid, we can not get inverted index for variant type, and result nullptr.The result for calling - // get_inverted_index with variant suffix should return corresponding inverted index meta. - bool check_inverted_index_by_type = false; - // Use segment’s own index_meta, for compatibility with future indexing needs to default to lowercase. + // In the _opts.tablet_schema, the sub-column type information for the variant is FieldType::OLAP_FIELD_TYPE_VARIANT. + // This is because the sub-column is created in create_materialized_variant_column. + // We use this column to locate the metadata for the inverted index, which requires a unique_id and path. + const auto& column = _opts.tablet_schema->column(cid); + int32_t col_unique_id = + column.is_extracted_column() ? column.parent_unique_id() : column.unique_id(); RETURN_IF_ERROR(_segment->new_inverted_index_iterator( - _opts.tablet_schema->column(cid), - _segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid), - check_inverted_index_by_type), + column, + _segment->_tablet_schema->inverted_index(col_unique_id, column.suffix_path()), Review Comment: Why change it? ########## be/src/olap/tablet_schema.cpp: ########## @@ -902,14 +902,13 @@ void TabletColumn::append_sparse_column(TabletColumn column) { _num_sparse_columns++; } -void TabletSchema::append_index(TabletIndex index) { +void TabletSchema::append_index(TabletIndex&& index) { _indexes.push_back(std::move(index)); } void TabletSchema::update_index(const TabletColumn& col, TabletIndex index) { - int32_t col_unique_id = col.unique_id(); - const std::string& suffix_path = - col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; + int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); Review Comment: Do you mean fixing a old bug here? @eldenmoon can you have a look? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org