[PATCH v2 12/16]AArch64: Add SVE2 Integer RTL patterns for Complex Addition, Multiply and FMA.

Tamar Christina Fri, 25 Sep 2020 07:31:42 -0700

Hi All,

This adds implementation for the optabs for complex operations.  With this the
following C code:


  void f90 (int _Complex a[restrict N], int _Complex b[restrict N],
            int _Complex c[restrict N])
  {
    for (int i=0; i < N; i++)
      c[i] = a[i] + (b[i] * I);
  }

generates

  f90:
          mov     x3, 0
          mov     x4, 200
          whilelo p0.s, xzr, x4
          .p2align 3,,7
  .L2:
          ld1w    z0.s, p0/z, [x0, x3, lsl 2]
          ld1w    z1.s, p0/z, [x1, x3, lsl 2]
          cadd    z0.s, z0.s, z1.s, #90
          st1w    z0.s, p0, [x2, x3, lsl 2]
          incw    x3
          whilelo p0.s, x3, x4
          b.any   .L2
          ret

instead of

  f90:
          mov     x3, 0
          mov     x4, 0
          mov     w5, 100
          whilelo p0.s, wzr, w5
          .p2align 3,,7
  .L2:
          ld2w    {z4.s - z5.s}, p0/z, [x0, x3, lsl 2]
          ld2w    {z2.s - z3.s}, p0/z, [x1, x3, lsl 2]
          sub     z0.s, z4.s, z3.s
          add     z1.s, z5.s, z2.s
          st2w    {z0.s - z1.s}, p0, [x2, x3, lsl 2]
          incw    x4
          inch    x3
          whilelo p0.s, w4, w5
          b.any   .L2
          ret

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64-sve2.md (cadd<rot><mode>3,
        cml<fcmac1><rot_op><mode>4, cmul<rot_op><mode>3): New.
        * config/aarch64/iterators.md (SVE2_INT_CMLA_OP, SVE2_INT_CMUL_OP,
        SVE2_INT_CADD_OP): New.

--

diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index e18b9fef16e72496588fb5850e362da4ae42898a..e601c6a4586e3ed1e11aedf047f56d556a99a302 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1774,6 +1774,16 @@ (define_insn "@aarch64_sve_<optab><mode>"
   [(set_attr "movprfx" "*,yes")]
 )
 
+;; unpredicated optab pattern for auto-vectorizer
+(define_expand "cadd<rot><mode>3"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand")
+	   (match_operand:SVE_FULL_I 2 "register_operand")]
+	  SVE2_INT_CADD_OP))]
+  "TARGET_SVE2"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Complex ternary operations
 ;; -------------------------------------------------------------------------
@@ -1813,6 +1823,47 @@ (define_insn "@aarch64_<optab>_lane_<mode>"
   [(set_attr "movprfx" "*,yes")]
 )
 
+;; unpredicated optab pattern for auto-vectorizer
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cml<fcmac1><rot_op><mode>4"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(plus:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand")
+	  (unspec:SVE_FULL_I
+	    [(match_operand:SVE_FULL_I 2 "register_operand")
+	     (match_operand:SVE_FULL_I 3 "register_operand")]
+	    SVE2_INT_CMLA_OP)))]
+  "TARGET_SVE2"
+{
+  emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (operands[0], operands[1],
+						   operands[2], operands[3]));
+  emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], operands[0],
+						   operands[2], operands[3]));
+  DONE;
+})
+
+;; unpredicated optab pattern for auto-vectorizer
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cmul<rot_op><mode>3"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand")
+	   (match_operand:SVE_FULL_I 2 "register_operand")
+	   (match_dup 3)]
+	  SVE2_INT_CMUL_OP))]
+  "TARGET_SVE2"
+{
+  operands[3] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (operands[0], operands[3],
+						   operands[1], operands[2]));
+  emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], operands[0],
+						   operands[1], operands[2]));
+  DONE;
+})
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Complex dot product
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 7662b929e2c4f6c103cc06e051eb574247320809..c11e976237d30771a7bd7c7fb56922f9c5c785de 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2583,6 +2583,23 @@ (define_int_iterator SVE2_INT_CMLA [UNSPEC_CMLA
 				    UNSPEC_SQRDCMLAH180
 				    UNSPEC_SQRDCMLAH270])
 
+;; Unlike the normal CMLA instructions these represent the actual operation you
+;; to be performed.  They will always need to be expanded into multiple
+;; sequences consisting of CMLA.
+(define_int_iterator SVE2_INT_CMLA_OP [UNSPEC_CMLA
+				       UNSPEC_CMLA180
+				       UNSPEC_CMLS])
+
+;; Unlike the normal CMLA instructions these represent the actual operation you
+;; to be performed.  They will always need to be expanded into multiple
+;; sequences consisting of CMLA.
+(define_int_iterator SVE2_INT_CMUL_OP [UNSPEC_CMUL
+				       UNSPEC_CMUL180])
+
+;; Same as SVE2_INT_CADD but exclude the saturating instructions
+(define_int_iterator SVE2_INT_CADD_OP [UNSPEC_CADD90
+				       UNSPEC_CADD270])
+
 (define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT
 				    UNSPEC_CDOT90
 				    UNSPEC_CDOT180

[PATCH v2 12/16]AArch64: Add SVE2 Integer RTL patterns for Complex Addition, Multiply and FMA.

Reply via email to