gavinchou commented on code in PR #30466: URL: https://github.com/apache/doris/pull/30466#discussion_r1470515451
########## be/src/util/rle_encoding.h: ########## @@ -493,7 +493,17 @@ template <typename T> void RleEncoder<T>::FlushRepeatedRun() { DCHECK_GT(repeat_count_, 0); // The lsb of 0 indicates this is a repeated run - int32_t indicator_value = repeat_count_ << 1 | 0; + + // Avoid attempting to manage run lengths exceeding the capacity of an int32_t. + // Note that the Parquet standard prohibits longer runs - refer to PARQUET-1290 for details. + if (repeat_count_ > std::numeric_limits<int32_t>::max()) [[unlikely]] { + num_buffered_values_ = 0; + repeat_count_ = 0; + LOG(WARNING) << "Run length exceeds int32_t"; Review Comment: also print repeat_count_ ########## be/src/util/rle_encoding.h: ########## @@ -493,7 +493,17 @@ template <typename T> void RleEncoder<T>::FlushRepeatedRun() { DCHECK_GT(repeat_count_, 0); // The lsb of 0 indicates this is a repeated run - int32_t indicator_value = repeat_count_ << 1 | 0; + + // Avoid attempting to manage run lengths exceeding the capacity of an int32_t. + // Note that the Parquet standard prohibits longer runs - refer to PARQUET-1290 for details. + if (repeat_count_ > std::numeric_limits<int32_t>::max()) [[unlikely]] { + num_buffered_values_ = 0; + repeat_count_ = 0; + LOG(WARNING) << "Run length exceeds int32_t"; + return; + } + + uint32_t indicator_value = repeat_count_ << 1 | 0; Review Comment: why "| 0" ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org