diff --git a/src/libFLAC/lpc_intrin_sse2.c b/src/libFLAC/lpc_intrin_sse2.c
index 2902374..b4b794e 100644
--- a/src/libFLAC/lpc_intrin_sse2.c
+++ b/src/libFLAC/lpc_intrin_sse2.c
@@ -1289,6 +1289,7 @@ void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsig
 {
 	int i;
 	FLAC__int32 sum;
+	if (order < 8) { FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data); return; }
 
 	FLAC__ASSERT(order > 0);
 	FLAC__ASSERT(order <= 32);
diff --git a/src/libFLAC/stream_decoder.c b/src/libFLAC/stream_decoder.c
index cf06398..cd41b5e 100644
--- a/src/libFLAC/stream_decoder.c
+++ b/src/libFLAC/stream_decoder.c
@@ -417,24 +417,17 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
 		}
 #endif
 #ifdef FLAC__HAS_X86INTRIN
-# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* not faster than asm MMX code */
+# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT: not faster than ASM/MMX code */
 		if(decoder->private_->cpuinfo.ia32.sse2) {
 			decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2;
 			decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2;
 		}
 # endif
-# if defined FLAC__SSE4_1_SUPPORTED && 1 /* faster than asm */
+# if defined FLAC__SSE4_1_SUPPORTED && 1 /* OPT: faster than asm; TODO: more tests */
 		if(decoder->private_->cpuinfo.ia32.sse41)
 			decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41;
 # endif
 #endif
-#elif defined FLAC__CPU_X86_64
-#ifdef FLAC__HAS_X86INTRIN
-# if defined FLAC__SSE2_SUPPORTED
-		decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2;
-		decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2;
-# endif
-#endif
 #elif defined FLAC__CPU_PPC
 		FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC);
 		if(decoder->private_->cpuinfo.ppc.altivec) {
diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c
index 343da4d..d6b1084 100644
--- a/src/libFLAC/stream_encoder.c
+++ b/src/libFLAC/stream_encoder.c
@@ -957,7 +957,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16;
 #    endif
 #    ifdef FLAC__SSE2_SUPPORTED
-		encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
+		/* encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; // OPT: not faster than C; TODO: more tests on different CPUs */
 		encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
 #    endif
 #   endif /* FLAC__HAS_X86INTRIN */
