airborne12 commented on code in PR #32436: URL: https://github.com/apache/doris/pull/32436#discussion_r1529987882
########## be/src/olap/rowset/segment_v2/inverted_index_writer.cpp: ########## @@ -369,39 +388,45 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { } const auto* offsets = reinterpret_cast<const uint64_t*>(offsets_ptr); if constexpr (field_is_slice_type(field_type)) { - if (_field == nullptr || _index_writer == nullptr) { - LOG(ERROR) << "field or index writer is null in inverted index writer."; - return Status::InternalError( - "field or index writer is null in inverted index writer"); + if (_index_writer == nullptr) { + LOG(ERROR) << "index writer is null in inverted index writer."; + return Status::InternalError("index writer is null in inverted index writer"); } auto ignore_above_value = get_parser_ignore_above_value_from_properties(_index_meta->properties()); auto ignore_above = std::stoi(ignore_above_value); for (int i = 0; i < count; ++i) { // offsets[i+1] is now row element count - std::vector<std::string> strings; // [0, 3, 6] // [10,20,30] [20,30,40], [30,40,50] auto start_off = offsets[i]; auto end_off = offsets[i + 1]; + // TODO(Amory).later we use object pool to avoid field creation + lucene::document::Field* new_field = nullptr; for (auto j = start_off; j < end_off; ++j) { if (null_map[j] == 1) { continue; } + // now we temp create field . later make a pool + if (Status st = create_field(&new_field); st != Status::OK()) { + LOG(ERROR) + << "create field " << string(_field_name.begin(), _field_name.end()) + << " error:" << st; + return st; + } auto* v = (Slice*)((const uint8_t*)value_ptr + j * field_size); - strings.emplace_back(v->get_data(), v->get_size()); - } - - auto value = join(strings, " "); - // only ignore_above UNTOKENIZED strings and empty strings not tokenized - if ((_parser_type == InvertedIndexParserType::PARSER_NONE && - value.length() > ignore_above) || - (_parser_type != InvertedIndexParserType::PARSER_NONE && value.empty())) { - RETURN_IF_ERROR(add_null_document()); - } else { - new_fulltext_field(value.c_str(), value.length()); - RETURN_IF_ERROR(add_document()); + if ((_parser_type == InvertedIndexParserType::PARSER_NONE && + v->get_size() > ignore_above) || + (_parser_type != InvertedIndexParserType::PARSER_NONE && v->empty())) { + // is here a null value? + RETURN_IF_ERROR(add_null_document()); + } else { + new_fulltext_field(v->get_data(), v->get_size(), new_field); + _doc->add(*new_field); + } } + RETURN_IF_ERROR(add_document()); Review Comment: if already add_null_document, then add_document() will cause unexpected problem -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org