This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d2fe24308b6 [Fix](parquet-reader) Fix definition level rle decode dead 
loop in parquet-reader. (#39523)
d2fe24308b6 is described below

commit d2fe24308b64ddf592d72362aab110e346e985c1
Author: Qi Chen <kaka11.c...@gmail.com>
AuthorDate: Mon Aug 26 23:26:53 2024 +0800

    [Fix](parquet-reader) Fix definition level rle decode dead loop in 
parquet-reader. (#39523)
---
 be/src/util/bit_stream_utils.h                       |  4 ++++
 be/src/util/rle_encoding.h                           |  2 ++
 be/src/vec/exec/format/parquet/level_decoder.h       |  4 +++-
 .../exec/format/parquet/vparquet_column_reader.cpp   | 20 ++++++++++++++++++--
 4 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/be/src/util/bit_stream_utils.h b/be/src/util/bit_stream_utils.h
index 550919440a8..b9b3621cf8b 100644
--- a/be/src/util/bit_stream_utils.h
+++ b/be/src/util/bit_stream_utils.h
@@ -145,6 +145,10 @@ public:
 
     bool is_initialized() const { return buffer_ != nullptr; }
 
+    const uint8_t* buffer() const { return buffer_; }
+
+    int max_bytes() const { return max_bytes_; }
+
 private:
     // Used by SeekToBit() and GetValue() to fetch the
     // the next word into buffer_.
diff --git a/be/src/util/rle_encoding.h b/be/src/util/rle_encoding.h
index be4df12916b..206349b4728 100644
--- a/be/src/util/rle_encoding.h
+++ b/be/src/util/rle_encoding.h
@@ -120,6 +120,8 @@ public:
     // Get current repeated value, make sure that count equals repeated_count()
     T get_repeated_value(size_t count);
 
+    const BitReader& bit_reader() const { return bit_reader_; }
+
 private:
     bool ReadHeader();
 
diff --git a/be/src/vec/exec/format/parquet/level_decoder.h 
b/be/src/vec/exec/format/parquet/level_decoder.h
index 4f76ac06837..de2f80d7f12 100644
--- a/be/src/vec/exec/format/parquet/level_decoder.h
+++ b/be/src/vec/exec/format/parquet/level_decoder.h
@@ -56,6 +56,8 @@ public:
 
     inline void rewind_one() { _rle_decoder.RewindOne(); }
 
+    const RleDecoder<level_t>& rle_decoder() const { return _rle_decoder; }
+
 private:
     tparquet::Encoding::type _encoding;
     level_t _bit_width = 0;
@@ -65,4 +67,4 @@ private:
     BitReader _bit_packed_decoder;
 };
 
-} // namespace doris::vectorized
\ No newline at end of file
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index c51a51bac3c..c31c63ee87c 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -209,7 +209,15 @@ Status ScalarColumnReader::_skip_values(size_t num_values) 
{
             level_t def_level = -1;
             size_t loop_skip = def_decoder.get_next_run(&def_level, num_values 
- skipped);
             if (loop_skip == 0) {
-                continue;
+                std::stringstream ss;
+                auto& bit_reader = def_decoder.rle_decoder().bit_reader();
+                ss << "def_decoder buffer (hex): ";
+                for (size_t i = 0; i < bit_reader.max_bytes(); ++i) {
+                    ss << std::hex << std::setw(2) << std::setfill('0')
+                       << static_cast<int>(bit_reader.buffer()[i]) << " ";
+                }
+                LOG(WARNING) << ss.str();
+                return Status::InternalError("Failed to decode definition 
level.");
             }
             if (def_level == 0) {
                 null_size += loop_skip;
@@ -254,7 +262,15 @@ Status ScalarColumnReader::_read_values(size_t num_values, 
ColumnPtr& doris_colu
                 level_t def_level;
                 size_t loop_read = def_decoder.get_next_run(&def_level, 
num_values - has_read);
                 if (loop_read == 0) {
-                    continue;
+                    std::stringstream ss;
+                    auto& bit_reader = def_decoder.rle_decoder().bit_reader();
+                    ss << "def_decoder buffer (hex): ";
+                    for (size_t i = 0; i < bit_reader.max_bytes(); ++i) {
+                        ss << std::hex << std::setw(2) << std::setfill('0')
+                           << static_cast<int>(bit_reader.buffer()[i]) << " ";
+                    }
+                    LOG(WARNING) << ss.str();
+                    return Status::InternalError("Failed to decode definition 
level.");
                 }
                 bool is_null = def_level == 0;
                 if (!(prev_is_null ^ is_null)) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to