This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new d2fe24308b6 [Fix](parquet-reader) Fix definition level rle decode dead loop in parquet-reader. (#39523) d2fe24308b6 is described below commit d2fe24308b64ddf592d72362aab110e346e985c1 Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Mon Aug 26 23:26:53 2024 +0800 [Fix](parquet-reader) Fix definition level rle decode dead loop in parquet-reader. (#39523) --- be/src/util/bit_stream_utils.h | 4 ++++ be/src/util/rle_encoding.h | 2 ++ be/src/vec/exec/format/parquet/level_decoder.h | 4 +++- .../exec/format/parquet/vparquet_column_reader.cpp | 20 ++++++++++++++++++-- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/be/src/util/bit_stream_utils.h b/be/src/util/bit_stream_utils.h index 550919440a8..b9b3621cf8b 100644 --- a/be/src/util/bit_stream_utils.h +++ b/be/src/util/bit_stream_utils.h @@ -145,6 +145,10 @@ public: bool is_initialized() const { return buffer_ != nullptr; } + const uint8_t* buffer() const { return buffer_; } + + int max_bytes() const { return max_bytes_; } + private: // Used by SeekToBit() and GetValue() to fetch the // the next word into buffer_. diff --git a/be/src/util/rle_encoding.h b/be/src/util/rle_encoding.h index be4df12916b..206349b4728 100644 --- a/be/src/util/rle_encoding.h +++ b/be/src/util/rle_encoding.h @@ -120,6 +120,8 @@ public: // Get current repeated value, make sure that count equals repeated_count() T get_repeated_value(size_t count); + const BitReader& bit_reader() const { return bit_reader_; } + private: bool ReadHeader(); diff --git a/be/src/vec/exec/format/parquet/level_decoder.h b/be/src/vec/exec/format/parquet/level_decoder.h index 4f76ac06837..de2f80d7f12 100644 --- a/be/src/vec/exec/format/parquet/level_decoder.h +++ b/be/src/vec/exec/format/parquet/level_decoder.h @@ -56,6 +56,8 @@ public: inline void rewind_one() { _rle_decoder.RewindOne(); } + const RleDecoder<level_t>& rle_decoder() const { return _rle_decoder; } + private: tparquet::Encoding::type _encoding; level_t _bit_width = 0; @@ -65,4 +67,4 @@ private: BitReader _bit_packed_decoder; }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index c51a51bac3c..c31c63ee87c 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -209,7 +209,15 @@ Status ScalarColumnReader::_skip_values(size_t num_values) { level_t def_level = -1; size_t loop_skip = def_decoder.get_next_run(&def_level, num_values - skipped); if (loop_skip == 0) { - continue; + std::stringstream ss; + auto& bit_reader = def_decoder.rle_decoder().bit_reader(); + ss << "def_decoder buffer (hex): "; + for (size_t i = 0; i < bit_reader.max_bytes(); ++i) { + ss << std::hex << std::setw(2) << std::setfill('0') + << static_cast<int>(bit_reader.buffer()[i]) << " "; + } + LOG(WARNING) << ss.str(); + return Status::InternalError("Failed to decode definition level."); } if (def_level == 0) { null_size += loop_skip; @@ -254,7 +262,15 @@ Status ScalarColumnReader::_read_values(size_t num_values, ColumnPtr& doris_colu level_t def_level; size_t loop_read = def_decoder.get_next_run(&def_level, num_values - has_read); if (loop_read == 0) { - continue; + std::stringstream ss; + auto& bit_reader = def_decoder.rle_decoder().bit_reader(); + ss << "def_decoder buffer (hex): "; + for (size_t i = 0; i < bit_reader.max_bytes(); ++i) { + ss << std::hex << std::setw(2) << std::setfill('0') + << static_cast<int>(bit_reader.buffer()[i]) << " "; + } + LOG(WARNING) << ss.str(); + return Status::InternalError("Failed to decode definition level."); } bool is_null = def_level == 0; if (!(prev_is_null ^ is_null)) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org