https://gcc.gnu.org/g:ce5a7af14ef0b83150666cc98e10305cba32e694
commit ce5a7af14ef0b83150666cc98e10305cba32e694 Author: Robin Dapp <[email protected]> Date: Thu Oct 30 07:48:07 2025 -0600 [PATCH v2] RISC-V: avlprop: Scale AVL by subreg ratio [PR122445]. Hi, Since r16-4391-g85ab3a22ed11c9 we can use a punned type/mode for grouped loads and stores. Vineet reported an x264 wrong-code bug since that commit. The crux of the issue is that in avlprop we back-propagate the AVL from consumers (like stores) to producers. When e.g. a V4QI vector is type-punned by a V1SI vector (subreg:V1SI (reg:V4QI ...) the AVL of that instruction refers to the outer subreg mode, i.e. for an AVL of 1 in a store we store one SImode element. The producer of the store data is not type punned and still uses V4QI and we produce 4 QImode elements. Due to this mismatch we back-propagate the consumer AVL of 1 to the producers, causing wrong code. This patch looks if the use is inside a subreg and scales the immediate AVL by the ratio of inner and outer mode. Changes from v1: - Move NULL check into loop. - Add REG_P check. Regtested on rv64gcv_zvl512b. Regards Robin PR target/122445 gcc/ChangeLog: * config/riscv/riscv-avlprop.cc (pass_avlprop::get_vlmax_ta_preferred_avl): Scale AVL of subreg uses. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr122445.c: New test. (cherry picked from commit bce897412c2dbf63bb6fd509cc8191cd8fbb8b56) Diff: --- gcc/config/riscv/riscv-avlprop.cc | 41 ++++++++++++++++++++++ .../gcc.target/riscv/rvv/autovec/pr122445.c | 26 ++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc index b8547a722c5e..a42764ec9ca0 100644 --- a/gcc/config/riscv/riscv-avlprop.cc +++ b/gcc/config/riscv/riscv-avlprop.cc @@ -77,6 +77,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "df.h" #include "rtl-ssa.h" +#include "rtl-iter.h" #include "cfgcleanup.h" #include "insn-attr.h" #include "tm-constrs.h" @@ -412,6 +413,46 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) const && def1->insn ()->compare_with (insn) >= 0) return NULL_RTX; } + else + { + /* If the use is in a subreg e.g. in a store it is possible that + we punned the vector mode with a larger mode like + (subreg:V1SI (reg:V4QI 123)). + For an AVL of 1 that means we actually store one SImode + element and not 1 QImode elements. But the latter is what we + would propagate if we took the AVL operand literally. + Instead we scale it by the ratio of inner and outer mode + (4 in the example above). */ + int factor = 1; + if (use->includes_subregs ()) + { + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, use_insn->rtl (), NONCONST) + { + const_rtx x = *iter; + if (x + && SUBREG_P (x) + && REG_P (SUBREG_REG (x)) + && REGNO (SUBREG_REG (x)) == use->regno () + && known_eq (GET_MODE_SIZE (use->mode ()), + GET_MODE_SIZE (GET_MODE (x)))) + { + if (can_div_trunc_p (GET_MODE_NUNITS (use->mode ()), + GET_MODE_NUNITS (GET_MODE (x)), + &factor)) + { + gcc_assert (factor > 0); + break; + } + else + return NULL_RTX; + } + } + } + + if (factor > 1) + new_use_avl = GEN_INT (INTVAL (new_use_avl) * factor); + } if (!use_avl) use_avl = new_use_avl; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c new file mode 100644 index 000000000000..47368684faa1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcbv_zvl256b -mabi=lp64d -O3 -mrvv-vector-bits=zvl --param=riscv-autovec-mode=V4QI -mtune=generic-ooo -fdump-rtl-avlprop-all" } */ + +typedef unsigned char uint8_t; +typedef short int16_t; + +#define FDEC_STRIDE 32 + +static inline uint8_t x264_clip_uint8( int x ) +{ + return x; +} + +void +x264_add4x4_idct (uint8_t *p_dst, int16_t d[16]) +{ + for( int y = 0; y < 4; y++ ) + { + for( int x = 0; x < 4; x++ ) + p_dst[x] = x264_clip_uint8( p_dst[x] + d[y*4+x] ); + p_dst += FDEC_STRIDE; + } +} + +/* { dg-final { scan-rtl-dump "Propagating AVL: \\(const_int 4" "avlprop" } } */ +/* { dg-final { scan-rtl-dump-not "Propagating AVL: \\(const_int 1" "avlprop" } } */
