LGTM
> From: "Robin Dapp"<[email protected]>
> Date:  Tue, Oct 28, 2025, 20:55
> Subject:  [PATCH v2] RISC-V: avlprop: Scale AVL by subreg ratio [PR122445].
> To: "gcc-patches"<[email protected]>
> Cc: <[email protected]>, <[email protected]>, <[email protected]>, 
> <[email protected]>, <[email protected]>
> Hi,

> 
> Since r16-4391-g85ab3a22ed11c9 we can use a punned type/mode for grouped

> loads and stores.  Vineet reported an x264 wrong-code bug since that

> commit.  The crux of the issue is that in avlprop we back-propagate

> the AVL from consumers (like stores) to producers.

> When e.g. a V4QI vector is type-punned by a V1SI vector

>   (subreg:V1SI (reg:V4QI ...)

> the AVL of that instruction refers to the outer subreg mode, i.e. for an

> AVL of 1 in a store we store one SImode element.  The producer of the

> store data is not type punned and still uses V4QI and we produce 4

> QImode elements.  Due to this mismatch we back-propagate the consumer

> AVL of 1 to the producers, causing wrong code.

> 
> This patch looks if the use is inside a subreg and scales the immediate

> AVL by the ratio of inner and outer mode.

> 
> Changes from v1:

>  - Move NULL check into loop.

>  - Add REG_P check.

> 
> Regtested on rv64gcv_zvl512b.

> 
> Regards

>  Robin

> 
>         PR target/122445

> 
> gcc/ChangeLog:

> 
>         * config/riscv/riscv-avlprop.cc 
> (pass_avlprop::get_vlmax_ta_preferred_avl):

>         Scale AVL of subreg uses.

> 
> gcc/testsuite/ChangeLog:

> 
>         * gcc.target/riscv/rvv/autovec/pr122445.c: New test.

> ---

>  gcc/config/riscv/riscv-avlprop.cc             | 41 +++++++++++++++++++

>  .../gcc.target/riscv/rvv/autovec/pr122445.c   | 26 ++++++++++++

>  2 files changed, 67 insertions(+)

>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c

> 
> diff --git a/gcc/config/riscv/riscv-avlprop.cc 
> b/gcc/config/riscv/riscv-avlprop.cc

> index b8547a722c5..a42764ec9ca 100644

> --- a/gcc/config/riscv/riscv-avlprop.cc

> +++ b/gcc/config/riscv/riscv-avlprop.cc

> @@ -77,6 +77,7 @@ along with GCC; see the file COPYING3.  If not see

>  #include "tree-pass.h"

>  #include "df.h"

>  #include "rtl-ssa.h"

> +#include "rtl-iter.h"

>  #include "cfgcleanup.h"

>  #include "insn-attr.h"

>  #include "tm-constrs.h"

> @@ -412,6 +413,46 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info 
> *insn) const

>                    && def1->insn ()->compare_with (insn) >= 0)

>                  return NULL_RTX;

>              }

> +          else

> +            {

> +              /* If the use is in a subreg e.g. in a store it is possible 
> that

> +                 we punned the vector mode with a larger mode like

> +                   (subreg:V1SI (reg:V4QI 123)).

> +                 For an AVL of 1 that means we actually store one SImode

> +                 element and not 1 QImode elements.  But the latter is what 
> we

> +                 would propagate if we took the AVL operand literally.

> +                 Instead we scale it by the ratio of inner and outer mode

> +                 (4 in the example above).  */

> +              int factor = 1;

> +              if (use->includes_subregs ())

> +                {

> +                  subrtx_iterator::array_type array;

> +                  FOR_EACH_SUBRTX (iter, array, use_insn->rtl (), NONCONST)

> +                    {

> +                      const_rtx x = *iter;

> +                      if (x

> +                          && SUBREG_P (x)

> +                          && REG_P (SUBREG_REG (x))

> +                          && REGNO (SUBREG_REG (x)) == use->regno ()

> +                          && known_eq (GET_MODE_SIZE (use->mode ()),

> +                                       GET_MODE_SIZE (GET_MODE (x))))

> +                        {

> +                          if (can_div_trunc_p (GET_MODE_NUNITS (use->mode 
> ()),

> +                                               GET_MODE_NUNITS (GET_MODE 
> (x)),

> +                                               &factor))

> +                            {

> +                              gcc_assert (factor > 0);

> +                              break;

> +                            }

> +                          else

> +                            return NULL_RTX;

> +                        }

> +                    }

> +                }

> +

> +              if (factor > 1)

> +                new_use_avl = GEN_INT (INTVAL (new_use_avl) * factor);

> +            }

>  

>            if (!use_avl)

>              use_avl = new_use_avl;

> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c

> new file mode 100644

> index 00000000000..47368684faa

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c

> @@ -0,0 +1,26 @@

> +/* { dg-do compile } */

> +/* { dg-options "-march=rv64gcbv_zvl256b -mabi=lp64d -O3 
> -mrvv-vector-bits=zvl --param=riscv-autovec-mode=V4QI -mtune=generic-ooo 
> -fdump-rtl-avlprop-all" } */

> +

> +typedef unsigned char uint8_t;

> +typedef short int16_t;

> +

> +#define FDEC_STRIDE 32

> +

> +static inline uint8_t x264_clip_uint8( int x )

> +{

> +  return x;

> +}

> +

> +void

> +x264_add4x4_idct (uint8_t *p_dst, int16_t d[16])

> +{

> +  for( int y = 0; y < 4; y++ )

> +    {

> +      for( int x = 0; x < 4; x++ )

> +        p_dst[x] = x264_clip_uint8( p_dst[x] + d[y*4+x] );

> +      p_dst += FDEC_STRIDE;

> +    }

> +}

> +

> +/* { dg-final { scan-rtl-dump "Propagating AVL: \\(const_int 4" "avlprop" } 
> } */

> +/* { dg-final { scan-rtl-dump-not "Propagating AVL: \\(const_int 1" 
> "avlprop" } } */

> -- 

> 2.51.0
> 

Reply via email to