On 22/05/25 15:56, Richard Sandiford wrote:
External email: Use caution opening links or attachments


<dhr...@nvidia.com> writes:
From: Dhruv Chawla <dhr...@nvidia.com>

This patch modifies the shift expander to immediately lower constant
shifts without unspec. It also modifies the ADR, SRA and ADDHNB patterns
to match the lowered forms of the shifts, as the predicate register is
not required for these instructions.

Bootstrapped and regtested on aarch64-linux-gnu.

Signed-off-by: Dhruv Chawla <dhr...@nvidia.com>
Co-authored-by: Richard Sandiford <richard.sandif...@arm.com>

gcc/ChangeLog:

       * gcc/config/aarch64/aarch64-sve.md (@aarch64_adr<mode>_shift):
       Match lowered form of ashift.
       (*aarch64_adr<mode>_shift): Likewise.
       (*aarch64_adr_shift_sxtw): Likewise.
       (*aarch64_adr_shift_uxtw): Likewise.
       (<ASHIFT:optab><mode>3): Check amount instead of operands[2] in
       aarch64_sve_<lr>shift_operand.
       (v<optab><mode>3): Generate unpredicated shifts for constant
       operands.
       (@aarch64_pred_<optab><mode>): Convert to a define_expand.
       (*aarch64_pred_<optab><mode>): Create define_insn_and_split pattern
       from @aarch64_pred_<optab><mode>.
       (*post_ra_v_ashl<mode>3): Rename to ...
       (aarch64_vashl<mode>3_const): ... this and remove reload requirement.
       (*post_ra_v_<optab><mode>3): Rename to ...
       (aarch64_v<optab><mode>3_const): ... this and remove reload
       requirement.
       * gcc/config/aarch64/aarch64-sve2.md
       (@aarch64_sve_add_<sve_int_op><mode>): Match lowered form of
       SHIFTRT.
       (*aarch64_sve2_sra<mode>): Likewise.
       (*bitmask_shift_plus<mode>): Match lowered form of lshiftrt.
---
  gcc/config/aarch64/aarch64-sve.md  | 119 +++++++++++++++--------------
  gcc/config/aarch64/aarch64-sve2.md |  46 ++++-------
  2 files changed, 75 insertions(+), 90 deletions(-)

OK, thanks.

It doesn't look like you're listed in MAINTAINERS as having write access.
If that's right, and if you'd like access, please follow the instructions
in https://gcc.gnu.org/gitwrite.html (I'll sponsor).

Committed as 7e0149fdb01b595949a3a6add478b3eed9acf478 and
c637186943b78b6e07e3310e878da922ecdd99f4. Thanks for the reviews!

Richard


diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index bf7569f932b..e1ec778b10d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4234,80 +4234,57 @@
  (define_expand "@aarch64_adr<mode>_shift"
    [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
       (plus:SVE_FULL_SDI
-       (unspec:SVE_FULL_SDI
-         [(match_dup 4)
-          (ashift:SVE_FULL_SDI
-            (match_operand:SVE_FULL_SDI 2 "register_operand")
-            (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
-         UNSPEC_PRED_X)
+       (ashift:SVE_FULL_SDI
+         (match_operand:SVE_FULL_SDI 2 "register_operand")
+         (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))
         (match_operand:SVE_FULL_SDI 1 "register_operand")))]
    "TARGET_SVE && TARGET_NON_STREAMING"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
  )

-(define_insn_and_rewrite "*aarch64_adr<mode>_shift"
+(define_insn "*aarch64_adr<mode>_shift"
    [(set (match_operand:SVE_24I 0 "register_operand" "=w")
       (plus:SVE_24I
-       (unspec:SVE_24I
-         [(match_operand 4)
-          (ashift:SVE_24I
-            (match_operand:SVE_24I 2 "register_operand" "w")
-            (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
-         UNSPEC_PRED_X)
+       (ashift:SVE_24I
+         (match_operand:SVE_24I 2 "register_operand" "w")
+         (match_operand:SVE_24I 3 "const_1_to_3_operand"))
         (match_operand:SVE_24I 1 "register_operand" "w")))]
    "TARGET_SVE && TARGET_NON_STREAMING"
    "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
-  "&& !CONSTANT_P (operands[4])"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
  )

  ;; Same, but with the index being sign-extended from the low 32 bits.
  (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
    [(set (match_operand:VNx2DI 0 "register_operand" "=w")
       (plus:VNx2DI
-       (unspec:VNx2DI
-         [(match_operand 4)
-          (ashift:VNx2DI
-            (unspec:VNx2DI
-              [(match_operand 5)
-               (sign_extend:VNx2DI
-                 (truncate:VNx2SI
-                   (match_operand:VNx2DI 2 "register_operand" "w")))]
-              UNSPEC_PRED_X)
-            (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
-         UNSPEC_PRED_X)
+       (ashift:VNx2DI
+         (unspec:VNx2DI
+           [(match_operand 4)
+            (sign_extend:VNx2DI
+              (truncate:VNx2SI
+                (match_operand:VNx2DI 2 "register_operand" "w")))]
+          UNSPEC_PRED_X)
+         (match_operand:VNx2DI 3 "const_1_to_3_operand"))
         (match_operand:VNx2DI 1 "register_operand" "w")))]
    "TARGET_SVE && TARGET_NON_STREAMING"
    "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
-  "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
+  "&& !CONSTANT_P (operands[4])"
    {
-    operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
+    operands[4] = CONSTM1_RTX (VNx2BImode);
    }
  )

  ;; Same, but with the index being zero-extended from the low 32 bits.
-(define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
+(define_insn "*aarch64_adr_shift_uxtw"
    [(set (match_operand:VNx2DI 0 "register_operand" "=w")
       (plus:VNx2DI
-       (unspec:VNx2DI
-         [(match_operand 5)
-          (ashift:VNx2DI
-            (and:VNx2DI
-              (match_operand:VNx2DI 2 "register_operand" "w")
-              (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
-            (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
-         UNSPEC_PRED_X)
+       (ashift:VNx2DI
+         (and:VNx2DI
+           (match_operand:VNx2DI 2 "register_operand" "w")
+           (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
+         (match_operand:VNx2DI 3 "const_1_to_3_operand"))
         (match_operand:VNx2DI 1 "register_operand" "w")))]
    "TARGET_SVE && TARGET_NON_STREAMING"
    "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
-  "&& !CONSTANT_P (operands[5])"
-  {
-    operands[5] = CONSTM1_RTX (VNx2BImode);
-  }
  )

  ;; -------------------------------------------------------------------------
@@ -4899,7 +4876,7 @@
      if (CONST_INT_P (operands[2]))
        {
       amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
-     if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
+     if (!aarch64_sve_<lr>shift_operand (amount, <MODE>mode))
         amount = force_reg (<MODE>mode, amount);
        }
      else
@@ -4923,15 +4900,40 @@
         UNSPEC_PRED_X))]
    "TARGET_SVE"
    {
+    if (CONSTANT_P (operands[2]))
+      {
+     emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[1],
+                                                   operands[2]));
+     DONE;
+      }
      operands[3] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )

-;; Shift by a vector, predicated with a PTRUE.  We don't actually need
-;; the predicate for the first alternative, but using Upa or X isn't
-;; likely to gain much and would make the instruction seem less uniform
-;; to the register allocator.
-(define_insn_and_split "@aarch64_pred_<optab><mode>"
+;; Shift by a vector, predicated with a PTRUE.
+(define_expand "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand")
+     (unspec:SVE_I
+       [(match_operand:<VPRED> 1 "register_operand")
+        (ASHIFT:SVE_I
+          (match_operand:SVE_I 2 "register_operand")
+          (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand"))]
+       UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  {
+    if (CONSTANT_P (operands[3]))
+      {
+     emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[2],
+                                                   operands[3]));
+     DONE;
+      }
+  }
+)
+
+;; We don't actually need the predicate for the first alternative, but
+;; using Upa or X isn't likely to gain much and would make the instruction
+;; seem less uniform to the register allocator.
+(define_insn_and_split "*aarch64_pred_<optab><mode>"
    [(set (match_operand:SVE_I 0 "register_operand")
       (unspec:SVE_I
         [(match_operand:<VPRED> 1 "register_operand")
@@ -4946,33 +4948,32 @@
       [ w        , Upl , w , 0     ; *              ] <shift>r\t%0.<Vetype>, %1/m, 
%3.<Vetype>, %2.<Vetype>
       [ ?&w      , Upl , w , w     ; yes            ] movprfx\t%0, 
%2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    }
-  "&& reload_completed
-   && !register_operand (operands[3], <MODE>mode)"
+  "&& !register_operand (operands[3], <MODE>mode)"
    [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
    ""
  )

-;; Unpredicated shift operations by a constant (post-RA only).
+;; Unpredicated shift operations by a constant.
  ;; These are generated by splitting a predicated instruction whose
  ;; predicate is unused.
-(define_insn "*post_ra_v_ashl<mode>3"
+(define_insn "aarch64_vashl<mode>3_const"
    [(set (match_operand:SVE_I 0 "register_operand")
       (ashift:SVE_I
         (match_operand:SVE_I 1 "register_operand")
         (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
-  "TARGET_SVE && reload_completed"
+  "TARGET_SVE"
    {@ [ cons: =0 , 1 , 2   ]
       [ w     , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
       [ w     , w , Dl  ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
    }
  )

-(define_insn "*post_ra_v_<optab><mode>3"
+(define_insn "aarch64_v<optab><mode>3_const"
    [(set (match_operand:SVE_I 0 "register_operand" "=w")
       (SHIFTRT:SVE_I
         (match_operand:SVE_I 1 "register_operand" "w")
         (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
-  "TARGET_SVE && reload_completed"
+  "TARGET_SVE"
    "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
  )

diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 871cf0bd2e8..62524f36de6 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1932,40 +1932,27 @@
  (define_expand "@aarch64_sve_add_<sve_int_op><mode>"
    [(set (match_operand:SVE_FULL_I 0 "register_operand")
       (plus:SVE_FULL_I
-       (unspec:SVE_FULL_I
-         [(match_dup 4)
-          (SHIFTRT:SVE_FULL_I
-            (match_operand:SVE_FULL_I 2 "register_operand")
-            (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
-         UNSPEC_PRED_X)
-      (match_operand:SVE_FULL_I 1 "register_operand")))]
+       (SHIFTRT:SVE_FULL_I
+         (match_operand:SVE_FULL_I 2 "register_operand")
+         (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
+       (match_operand:SVE_FULL_I 1 "register_operand")))]
    "TARGET_SVE2"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
  )

  ;; Pattern-match SSRA and USRA as a predicated operation whose predicate
  ;; isn't needed.
-(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
+(define_insn "*aarch64_sve2_sra<mode>"
    [(set (match_operand:SVE_FULL_I 0 "register_operand")
       (plus:SVE_FULL_I
-       (unspec:SVE_FULL_I
-         [(match_operand 4)
-          (SHIFTRT:SVE_FULL_I
-            (match_operand:SVE_FULL_I 2 "register_operand")
-            (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
-         UNSPEC_PRED_X)
+       (SHIFTRT:SVE_FULL_I
+         (match_operand:SVE_FULL_I 2 "register_operand")
+         (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
        (match_operand:SVE_FULL_I 1 "register_operand")))]
    "TARGET_SVE2"
    {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
       [ w        , 0 , w ; *              ] <sra_op>sra\t%0.<Vetype>, 
%2.<Vetype>, #%3
       [ ?&w      , w , w ; yes            ] movprfx\t%0, 
%1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
    }
-  "&& !CONSTANT_P (operands[4])"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
  )

  ;; SRSRA and URSRA.
@@ -2715,17 +2702,14 @@
  ;; Optimize ((a + b) >> n) where n is half the bitsize of the vector
  (define_insn "*bitmask_shift_plus<mode>"
    [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
-     (unspec:SVE_FULL_HSDI
-        [(match_operand:<VPRED> 1)
-         (lshiftrt:SVE_FULL_HSDI
-           (plus:SVE_FULL_HSDI
-             (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
-             (match_operand:SVE_FULL_HSDI 3 "register_operand" "w"))
-           (match_operand:SVE_FULL_HSDI 4
-              "aarch64_simd_shift_imm_vec_exact_top" ""))]
-          UNSPEC_PRED_X))]
+     (lshiftrt:SVE_FULL_HSDI
+       (plus:SVE_FULL_HSDI
+         (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+         (match_operand:SVE_FULL_HSDI 2 "register_operand" "w"))
+       (match_operand:SVE_FULL_HSDI 3
+         "aarch64_simd_shift_imm_vec_exact_top" "")))]
    "TARGET_SVE2"
-  "addhnb\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
+  "addhnb\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
  )

  ;; -------------------------------------------------------------------------

--
Regards,
Dhruv

Reply via email to