diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c
index 6380ce6..469784f 100644
--- a/src/libFLAC/stream_encoder.c
+++ b/src/libFLAC/stream_encoder.c
@@ -3978,10 +3978,10 @@ void precompute_partition_info_sums_(
 
 	/* first do max_partition_order */
 	{
+		unsigned threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
 		unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order);
-		/* WATCHOUT: "+ bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum
-		 * assumed size of the average residual magnitude */
-		if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) {
+		/* WATCHOUT: "bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum assumed size of the average residual magnitude */
+		if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS <= threshold) { /* (FLAC__bitmath_ilog2(default_partition_samples) + 1) + (bps + FLAC__MAX_EXTRA_RESIDUAL_BPS) <= 32 */
 			FLAC__uint32 abs_residual_partition_sum;
 
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
@@ -3992,15 +3992,32 @@ void precompute_partition_info_sums_(
 				abs_residual_partition_sums[partition] = abs_residual_partition_sum;
 			}
 		}
-		else { /* have to pessimistically use 64 bits for accumulator */
-			FLAC__uint64 abs_residual_partition_sum;
+		else { /* still try to use 32-bit math */
+			FLAC__uint32 abs_residual_partition_sum;
+			FLAC__uint32 r_bits, r_abs;
+			unsigned r_sample_init;
 
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
 				end += default_partition_samples;
-				abs_residual_partition_sum = 0;
-				for( ; residual_sample < end; residual_sample++)
-					abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
-				abs_residual_partition_sums[partition] = abs_residual_partition_sum;
+				abs_residual_partition_sum = 0; r_bits = 0;
+				r_sample_init = residual_sample; /* save initial position */
+				for( ; residual_sample < end; residual_sample++) {
+					r_abs = abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
+					abs_residual_partition_sum += r_abs;
+					r_bits |= r_abs;
+				}
+
+				if(FLAC__bitmath_ilog2(r_bits|1) < threshold) { /* actually the condition is (r_bits==0 ? 0 : FLAC__bitmath_ilog2(r_bits)+1) <= threshold */
+					abs_residual_partition_sums[partition] = abs_residual_partition_sum; /* no overflow */
+				}
+				else { /* have to pessimistically use 64 bits for accumulator */
+					FLAC__uint64 abs_residual_partition_sum64 = 0;
+
+					residual_sample = r_sample_init; /* rewind and repeat summation */
+					for( ; residual_sample < end; residual_sample++)
+						abs_residual_partition_sum64 += (FLAC__uint32)abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
+					abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
+				}
 			}
 		}
 	}
