This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new d259770b86 [Fix] avoid core dump cause by malformed bitmap type data (#10458) d259770b86 is described below commit d259770b86955a4cb34102d4cc95ae1a731326af Author: Zhengguo Yang <yangz...@gmail.com> AuthorDate: Thu Jun 30 11:27:22 2022 +0800 [Fix] avoid core dump cause by malformed bitmap type data (#10458) --- be/src/olap/rowset/segment_v2/binary_dict_page.cpp | 8 ++++---- be/src/olap/rowset/segment_v2/binary_dict_page.h | 4 ++-- be/src/olap/rowset/segment_v2/binary_plain_page.h | 20 +++++++++++++++++++- be/src/olap/rowset/segment_v2/column_reader.cpp | 6 ++++-- be/src/olap/rowset/segment_v2/encoding_info.cpp | 4 ++-- .../olap/rowset/segment_v2/indexed_column_reader.cpp | 4 +++- .../olap/rowset/segment_v2/indexed_column_writer.cpp | 6 ++++-- be/src/olap/rowset/segment_v2/options.h | 8 +++++--- be/src/olap/rowset/segment_v2/parsed_page.h | 4 ++-- be/src/tools/meta_tool.cpp | 1 - be/src/util/bitmap_value.h | 14 +++++++++++++- .../olap/rowset/segment_v2/binary_dict_page_test.cpp | 10 ++++++---- .../rowset/segment_v2/binary_plain_page_test.cpp | 3 ++- 13 files changed, 66 insertions(+), 26 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index b53ff52ade..ea9fe66dfc 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -45,7 +45,7 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options) _data_page_builder.reset(new BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>(options)); PageBuilderOptions dict_builder_options; dict_builder_options.data_page_size = _options.dict_page_size; - _dict_builder.reset(new BinaryPlainPageBuilder(dict_builder_options)); + _dict_builder.reset(new BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options)); reset(); } @@ -135,7 +135,7 @@ void BinaryDictPageBuilder::reset() { _buffer.resize(BINARY_DICT_PAGE_HEADER_SIZE); if (_encoding_type == DICT_ENCODING && _dict_builder->is_page_full()) { - _data_page_builder.reset(new BinaryPlainPageBuilder(_options)); + _data_page_builder.reset(new BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>(_options)); _encoding_type = PLAIN_ENCODING; } else { _data_page_builder->reset(); @@ -206,7 +206,7 @@ Status BinaryDictPageDecoder::init() { _bit_shuffle_ptr = new BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options)); } else if (_encoding_type == PLAIN_ENCODING) { DCHECK_EQ(_encoding_type, PLAIN_ENCODING); - _data_page_decoder.reset(new BinaryPlainPageDecoder(_data, _options)); + _data_page_decoder.reset(new BinaryPlainPageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options)); } else { LOG(WARNING) << "invalid encoding type:" << _encoding_type; return Status::Corruption(strings::Substitute("invalid encoding type:$0", _encoding_type)); @@ -228,7 +228,7 @@ bool BinaryDictPageDecoder::is_dict_encoding() const { } void BinaryDictPageDecoder::set_dict_decoder(PageDecoder* dict_decoder, StringRef* dict_word_info) { - _dict_decoder = (BinaryPlainPageDecoder*)dict_decoder; + _dict_decoder = (BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>*)dict_decoder; _dict_word_info = dict_word_info; }; diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.h b/be/src/olap/rowset/segment_v2/binary_dict_page.h index e1dcdba49b..d57f7cf974 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.h +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.h @@ -78,7 +78,7 @@ private: std::unique_ptr<PageBuilder> _data_page_builder; - std::unique_ptr<BinaryPlainPageBuilder> _dict_builder; + std::unique_ptr<BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>> _dict_builder; EncodingTypePB _encoding_type; struct HashOfSlice { @@ -122,7 +122,7 @@ private: Slice _data; PageDecoderOptions _options; std::unique_ptr<PageDecoder> _data_page_decoder; - BinaryPlainPageDecoder* _dict_decoder = nullptr; + BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>* _dict_decoder = nullptr; BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>* _bit_shuffle_ptr = nullptr; bool _parsed; EncodingTypePB _encoding_type; diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h b/be/src/olap/rowset/segment_v2/binary_plain_page.h index cac9da3709..1919efcba8 100644 --- a/be/src/olap/rowset/segment_v2/binary_plain_page.h +++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h @@ -44,6 +44,7 @@ namespace doris { namespace segment_v2 { +template <FieldType Type> class BinaryPlainPageBuilder : public PageBuilder { public: BinaryPlainPageBuilder(const PageBuilderOptions& options) @@ -64,6 +65,11 @@ public: // If the page is full, should stop adding more items. while (!is_page_full() && i < *count) { auto src = reinterpret_cast<const Slice*>(vals); + if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) { + if (_options.need_check_bitmap) { + RETURN_IF_ERROR(BitmapTypeCode::validate(*(src->data))); + } + } size_t offset = _buffer.size(); _offsets.push_back(offset); _buffer.append(src->data, src->size); @@ -154,6 +160,7 @@ private: faststring _last_value; }; +template <FieldType Type> class BinaryPlainPageDecoder : public PageDecoder { public: BinaryPlainPageDecoder(Slice data) : BinaryPlainPageDecoder(data, PageDecoderOptions()) {} @@ -204,6 +211,11 @@ public: size_t mem_len[max_fetch]; for (size_t i = 0; i < max_fetch; i++, out++, _cur_idx++) { *out = string_at_index(_cur_idx); + if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) { + if (_options.need_check_bitmap) { + RETURN_IF_ERROR(BitmapTypeCode::validate(*(out->data))); + } + } mem_len[i] = out->size; } @@ -246,6 +258,11 @@ public: uint32_t len = offset(_cur_idx + 1) - start_offset; len_array[i] = len; start_offset_array[i] = start_offset; + if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) { + if (_options.need_check_bitmap) { + RETURN_IF_ERROR(BitmapTypeCode::validate(*(_data.data + start_offset))); + } + } } dst->insert_many_binary_data(_data.mutable_data(), len_array, start_offset_array, max_fetch); @@ -271,8 +288,9 @@ public: } void get_dict_word_info(StringRef* dict_word_info) { - if (_num_elems <= 0) [[unlikely]] + if (UNLIKELY(_num_elems <= 0)) { return; + } char* data_begin = (char*)&_data[0]; char* offset_ptr = (char*)&_data[_offsets_pos]; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 7243736281..a7d8c99899 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -747,10 +747,12 @@ Status FileColumnIterator::_read_data_page(const OrdinalPageIndexIterator& iter) _compress_codec.get())); // ignore dict_footer.dict_page_footer().encoding() due to only // PLAIN_ENCODING is supported for dict page right now - _dict_decoder = std::make_unique<BinaryPlainPageDecoder>(dict_data); + _dict_decoder = std::make_unique<BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>>( + dict_data); RETURN_IF_ERROR(_dict_decoder->init()); - auto* pd_decoder = (BinaryPlainPageDecoder*)_dict_decoder.get(); + auto* pd_decoder = + (BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>*)_dict_decoder.get(); _dict_word_info.reset(new StringRef[pd_decoder->_num_elems]); pd_decoder->get_dict_word_info(_dict_word_info.get()); } diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp index ac3a1a624f..4f9e2d5d14 100644 --- a/be/src/olap/rowset/segment_v2/encoding_info.cpp +++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp @@ -58,12 +58,12 @@ struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> { template <FieldType type> struct TypeEncodingTraits<type, PLAIN_ENCODING, Slice> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new BinaryPlainPageBuilder(opts); + *builder = new BinaryPlainPageBuilder<type>(opts); return Status::OK(); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { - *decoder = new BinaryPlainPageDecoder(data, opts); + *decoder = new BinaryPlainPageDecoder<type>(data, opts); return Status::OK(); } }; diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp index 6a0b2ca81e..682f620ea8 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp @@ -110,8 +110,10 @@ Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) { _compress_codec.get())); // parse data page // note that page_index is not used in IndexedColumnIterator, so we pass 0 + PageDecoderOptions opts; + opts.need_check_bitmap = false; return ParsedPage::create(std::move(handle), body, footer.data_page_footer(), - _reader->encoding_info(), pp, 0, &_data_page); + _reader->encoding_info(), pp, 0, &_data_page, opts); } Status IndexedColumnIterator::seek_to_ordinal(ordinal_t idx) { diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp index 4c7259c90c..979481adbc 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp @@ -59,8 +59,10 @@ Status IndexedColumnWriter::init() { // because the default encoding of a data type can be changed in the future DCHECK_NE(_options.encoding, DEFAULT_ENCODING); - PageBuilder* data_page_builder; - RETURN_IF_ERROR(encoding_info->create_page_builder(PageBuilderOptions(), &data_page_builder)); + PageBuilder* data_page_builder = nullptr; + PageBuilderOptions builder_option; + builder_option.need_check_bitmap = false; + RETURN_IF_ERROR(encoding_info->create_page_builder(builder_option, &data_page_builder)); _data_page_builder.reset(data_page_builder); if (_options.write_ordinal_index) { diff --git a/be/src/olap/rowset/segment_v2/options.h b/be/src/olap/rowset/segment_v2/options.h index 0cb2dafa39..9405eb19cf 100644 --- a/be/src/olap/rowset/segment_v2/options.h +++ b/be/src/olap/rowset/segment_v2/options.h @@ -22,17 +22,19 @@ namespace doris { namespace segment_v2 { -class BinaryPlainPageDecoder; - static constexpr size_t DEFAULT_PAGE_SIZE = 1024 * 1024; // default size: 1M struct PageBuilderOptions { size_t data_page_size = DEFAULT_PAGE_SIZE; size_t dict_page_size = DEFAULT_PAGE_SIZE; + + bool need_check_bitmap = true; }; -struct PageDecoderOptions {}; +struct PageDecoderOptions { + bool need_check_bitmap = true; +}; } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/parsed_page.h b/be/src/olap/rowset/segment_v2/parsed_page.h index c70ebd62a6..232c347c38 100644 --- a/be/src/olap/rowset/segment_v2/parsed_page.h +++ b/be/src/olap/rowset/segment_v2/parsed_page.h @@ -38,7 +38,8 @@ namespace segment_v2 { struct ParsedPage { static Status create(PageHandle handle, const Slice& body, const DataPageFooterPB& footer, const EncodingInfo* encoding, const PagePointer& page_pointer, - uint32_t page_index, ParsedPage* result) { + uint32_t page_index, ParsedPage* result, + PageDecoderOptions opts = PageDecoderOptions()) { result->~ParsedPage(); ParsedPage* page = new (result)(ParsedPage); page->page_handle = std::move(handle); @@ -53,7 +54,6 @@ struct ParsedPage { } Slice data_slice(body.data, body.size - null_size); - PageDecoderOptions opts; RETURN_IF_ERROR(encoding->create_page_decoder(data_slice, opts, &page->data_decoder)); RETURN_IF_ERROR(page->data_decoder->init()); diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 60b56b35cb..f4e5e6087c 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -56,7 +56,6 @@ using doris::RandomAccessFile; using strings::Substitute; using doris::segment_v2::SegmentFooterPB; using doris::segment_v2::ColumnReader; -using doris::segment_v2::BinaryPlainPageDecoder; using doris::segment_v2::PageHandle; using doris::segment_v2::PagePointer; using doris::segment_v2::ColumnReaderOptions; diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 4b1445ba59..7eaeb11e79 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -71,6 +71,16 @@ struct BitmapTypeCode { // added in 0.12 BITMAP64 = 4 }; + Status static inline validate(int bitmap_type) { + if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type > type::BITMAP64)) { + std::string err_msg = + fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}", + BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type); + LOG(ERROR) << err_msg; + return Status::IOError(err_msg); + } + return Status::OK(); + } }; namespace detail { @@ -1536,7 +1546,6 @@ public: // Deserialize a bitmap value from `src`. // Return false if `src` begins with unknown type code, true otherwise. bool deserialize(const char* src) { - DCHECK(*src >= BitmapTypeCode::EMPTY && *src <= BitmapTypeCode::BITMAP64); switch (*src) { case BitmapTypeCode::EMPTY: _type = EMPTY; @@ -1555,6 +1564,9 @@ public: _bitmap = detail::Roaring64Map::read(src); break; default: + LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY + << " and " << BitmapTypeCode::BITMAP64 << " actrual is " + << static_cast<int>(*src); return false; } return true; diff --git a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp index 8c59da6491..bb3493efd6 100644 --- a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp @@ -68,8 +68,9 @@ public: Status status = page_builder.get_dictionary_page(&dict_slice); EXPECT_TRUE(status.ok()); PageDecoderOptions dict_decoder_options; - std::unique_ptr<BinaryPlainPageDecoder> dict_page_decoder( - new BinaryPlainPageDecoder(dict_slice.slice(), dict_decoder_options)); + std::unique_ptr<BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>> dict_page_decoder( + new BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>(dict_slice.slice(), + dict_decoder_options)); status = dict_page_decoder->init(); EXPECT_TRUE(status.ok()); // because every slice is unique @@ -175,8 +176,9 @@ public: int slice_index = random() % results.size(); //int slice_index = 1; PageDecoderOptions dict_decoder_options; - std::unique_ptr<BinaryPlainPageDecoder> dict_page_decoder( - new BinaryPlainPageDecoder(dict_slice.slice(), dict_decoder_options)); + std::unique_ptr<BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>> dict_page_decoder( + new BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>(dict_slice.slice(), + dict_decoder_options)); status = dict_page_decoder->init(); EXPECT_TRUE(status.ok()); diff --git a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp index 0e0d34606d..1da1db55e1 100644 --- a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp @@ -108,7 +108,8 @@ public: }; TEST_F(BinaryPlainPageTest, TestBinaryPlainPageBuilderSeekByValueSmallPage) { - TestBinarySeekByValueSmallPage<BinaryPlainPageBuilder, BinaryPlainPageDecoder>(); + TestBinarySeekByValueSmallPage<BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>, + BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>>(); } } // namespace segment_v2 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org