This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit e2b94586fd8531ed79399c595ffcd2d860559fc0 Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Fri Aug 25 21:34:11 2023 +0800 [fix](invert index) fix the keyword type index length limit (#23503) --- be/src/clucene | 2 +- be/src/olap/rowset/segment_v2/column_writer.cpp | 9 +++++---- be/src/olap/rowset/segment_v2/inverted_index_writer.cpp | 17 ++++++++++++++--- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/be/src/clucene b/be/src/clucene index fa33b52263..9e60ec666b 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit fa33b522639948f7845b6500910172f55eeae13a +Subproject commit 9e60ec666b3ccf7dd8b7c3e331ac03ccf87d5845 diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index dcb0f89858..2f27df779b 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -510,7 +510,7 @@ Status ScalarColumnWriter::append_nulls(size_t num_rows) { _bitmap_index_builder->add_nulls(num_rows); } if (_opts.inverted_index) { - _inverted_index_builder->add_nulls(num_rows); + RETURN_IF_ERROR(_inverted_index_builder->add_nulls(num_rows)); } if (_opts.need_bloom_filter) { _bloom_filter_index_builder->add_nulls(num_rows); @@ -545,7 +545,8 @@ Status ScalarColumnWriter::append_data_in_current_page(const uint8_t* data, size _bitmap_index_builder->add_values(data, *num_written); } if (_opts.inverted_index) { - _inverted_index_builder->add_values(get_field()->name(), data, *num_written); + RETURN_IF_ERROR( + _inverted_index_builder->add_values(get_field()->name(), data, *num_written)); } if (_opts.need_bloom_filter) { _bloom_filter_index_builder->add_values(data, *num_written); @@ -934,8 +935,8 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { auto writer = dynamic_cast<ScalarColumnWriter*>(_item_writer.get()); if (writer != nullptr) { //NOTE: use array field name as index field, but item_writer size should be used when moving item_data_ptr - _inverted_index_builder->add_array_values(_item_writer->get_field()->size(), - col_cursor, 1); + RETURN_IF_ERROR(_inverted_index_builder->add_array_values( + _item_writer->get_field()->size(), col_cursor, 1)); } } } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 1762a60189..e779170d03 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -203,6 +203,17 @@ public: return Status::OK(); } + Status add_document() { + try { + _index_writer->addDocument(_doc.get()); + } catch (const CLuceneError& e) { + _dir->deleteDirectory(); + return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + "CLuceneError add_document: {}", e.what()); + } + return Status::OK(); + } + Status add_nulls(uint32_t count) override { _null_bitmap.addRange(_rid, _rid + count); _rid += count; @@ -215,7 +226,7 @@ public: for (int i = 0; i < count; ++i) { new_fulltext_field(empty_value.c_str(), 0); - _index_writer->addDocument(_doc.get()); + RETURN_IF_ERROR(add_document()); } } return Status::OK(); @@ -261,7 +272,7 @@ public: auto* v = (Slice*)values; for (int i = 0; i < count; ++i) { new_fulltext_field(v->get_data(), v->get_size()); - _index_writer->addDocument(_doc.get()); + RETURN_IF_ERROR(add_document()); ++v; _rid++; } @@ -294,7 +305,7 @@ public: auto value = join(strings, " "); new_fulltext_field(value.c_str(), value.length()); _rid++; - _index_writer->addDocument(_doc.get()); + RETURN_IF_ERROR(add_document()); values++; } } else if constexpr (field_is_numeric_type(field_type)) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org