https://gcc.gnu.org/g:ce5a7af14ef0b83150666cc98e10305cba32e694

commit ce5a7af14ef0b83150666cc98e10305cba32e694
Author: Robin Dapp <[email protected]>
Date:   Thu Oct 30 07:48:07 2025 -0600

    [PATCH v2] RISC-V: avlprop: Scale AVL by subreg ratio [PR122445].
    
    Hi,
    
    Since r16-4391-g85ab3a22ed11c9 we can use a punned type/mode for grouped
    loads and stores.  Vineet reported an x264 wrong-code bug since that
    commit.  The crux of the issue is that in avlprop we back-propagate
    the AVL from consumers (like stores) to producers.
    When e.g. a V4QI vector is type-punned by a V1SI vector
      (subreg:V1SI (reg:V4QI ...)
    the AVL of that instruction refers to the outer subreg mode, i.e. for an
    AVL of 1 in a store we store one SImode element.  The producer of the
    store data is not type punned and still uses V4QI and we produce 4
    QImode elements.  Due to this mismatch we back-propagate the consumer
    AVL of 1 to the producers, causing wrong code.
    
    This patch looks if the use is inside a subreg and scales the immediate
    AVL by the ratio of inner and outer mode.
    
    Changes from v1:
     - Move NULL check into loop.
     - Add REG_P check.
    
    Regtested on rv64gcv_zvl512b.
    
    Regards
     Robin
    
            PR target/122445
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-avlprop.cc 
(pass_avlprop::get_vlmax_ta_preferred_avl):
            Scale AVL of subreg uses.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/pr122445.c: New test.
    
    (cherry picked from commit bce897412c2dbf63bb6fd509cc8191cd8fbb8b56)

Diff:
---
 gcc/config/riscv/riscv-avlprop.cc                  | 41 ++++++++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/pr122445.c        | 26 ++++++++++++++
 2 files changed, 67 insertions(+)

diff --git a/gcc/config/riscv/riscv-avlprop.cc 
b/gcc/config/riscv/riscv-avlprop.cc
index b8547a722c5e..a42764ec9ca0 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -77,6 +77,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "df.h"
 #include "rtl-ssa.h"
+#include "rtl-iter.h"
 #include "cfgcleanup.h"
 #include "insn-attr.h"
 #include "tm-constrs.h"
@@ -412,6 +413,46 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) 
const
                  && def1->insn ()->compare_with (insn) >= 0)
                return NULL_RTX;
            }
+         else
+           {
+             /* If the use is in a subreg e.g. in a store it is possible that
+                we punned the vector mode with a larger mode like
+                  (subreg:V1SI (reg:V4QI 123)).
+                For an AVL of 1 that means we actually store one SImode
+                element and not 1 QImode elements.  But the latter is what we
+                would propagate if we took the AVL operand literally.
+                Instead we scale it by the ratio of inner and outer mode
+                (4 in the example above).  */
+             int factor = 1;
+             if (use->includes_subregs ())
+               {
+                 subrtx_iterator::array_type array;
+                 FOR_EACH_SUBRTX (iter, array, use_insn->rtl (), NONCONST)
+                   {
+                     const_rtx x = *iter;
+                     if (x
+                         && SUBREG_P (x)
+                         && REG_P (SUBREG_REG (x))
+                         && REGNO (SUBREG_REG (x)) == use->regno ()
+                         && known_eq (GET_MODE_SIZE (use->mode ()),
+                                      GET_MODE_SIZE (GET_MODE (x))))
+                       {
+                         if (can_div_trunc_p (GET_MODE_NUNITS (use->mode ()),
+                                              GET_MODE_NUNITS (GET_MODE (x)),
+                                              &factor))
+                           {
+                             gcc_assert (factor > 0);
+                             break;
+                           }
+                         else
+                           return NULL_RTX;
+                       }
+                   }
+               }
+
+             if (factor > 1)
+               new_use_avl = GEN_INT (INTVAL (new_use_avl) * factor);
+           }
 
          if (!use_avl)
            use_avl = new_use_avl;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c
new file mode 100644
index 000000000000..47368684faa1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcbv_zvl256b -mabi=lp64d -O3 -mrvv-vector-bits=zvl 
--param=riscv-autovec-mode=V4QI -mtune=generic-ooo -fdump-rtl-avlprop-all" } */
+
+typedef unsigned char uint8_t;
+typedef short int16_t;
+
+#define FDEC_STRIDE 32
+
+static inline uint8_t x264_clip_uint8( int x )
+{
+  return x;
+}
+
+void
+x264_add4x4_idct (uint8_t *p_dst, int16_t d[16])
+{
+  for( int y = 0; y < 4; y++ )
+    {
+      for( int x = 0; x < 4; x++ )
+        p_dst[x] = x264_clip_uint8( p_dst[x] + d[y*4+x] );
+      p_dst += FDEC_STRIDE;
+    }
+}
+
+/* { dg-final { scan-rtl-dump "Propagating AVL: \\(const_int 4" "avlprop" } } 
*/
+/* { dg-final { scan-rtl-dump-not "Propagating AVL: \\(const_int 1" "avlprop" 
} } */

Reply via email to