Hi All, This adds implementation for the optabs for complex operations. With this the following C code:
void f90 (int _Complex a[restrict N], int _Complex b[restrict N], int _Complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + (b[i] * I); } generates f90: mov x3, 0 mov x4, 200 whilelo p0.s, xzr, x4 .p2align 3,,7 .L2: ld1w z0.s, p0/z, [x0, x3, lsl 2] ld1w z1.s, p0/z, [x1, x3, lsl 2] cadd z0.s, z0.s, z1.s, #90 st1w z0.s, p0, [x2, x3, lsl 2] incw x3 whilelo p0.s, x3, x4 b.any .L2 ret instead of f90: mov x3, 0 mov x4, 0 mov w5, 100 whilelo p0.s, wzr, w5 .p2align 3,,7 .L2: ld2w {z4.s - z5.s}, p0/z, [x0, x3, lsl 2] ld2w {z2.s - z3.s}, p0/z, [x1, x3, lsl 2] sub z0.s, z4.s, z3.s add z1.s, z5.s, z2.s st2w {z0.s - z1.s}, p0, [x2, x3, lsl 2] incw x4 inch x3 whilelo p0.s, w4, w5 b.any .L2 ret Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-sve2.md (cadd<rot><mode>3, cml<fcmac1><rot_op><mode>4, cmul<rot_op><mode>3): New. * config/aarch64/iterators.md (SVE2_INT_CMLA_OP, SVE2_INT_CMUL_OP, SVE2_INT_CADD_OP): New. --
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index e18b9fef16e72496588fb5850e362da4ae42898a..e601c6a4586e3ed1e11aedf047f56d556a99a302 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -1774,6 +1774,16 @@ (define_insn "@aarch64_sve_<optab><mode>" [(set_attr "movprfx" "*,yes")] ) +;; unpredicated optab pattern for auto-vectorizer +(define_expand "cadd<rot><mode>3" + [(set (match_operand:SVE_FULL_I 0 "register_operand") + (unspec:SVE_FULL_I + [(match_operand:SVE_FULL_I 1 "register_operand") + (match_operand:SVE_FULL_I 2 "register_operand")] + SVE2_INT_CADD_OP))] + "TARGET_SVE2" +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Complex ternary operations ;; ------------------------------------------------------------------------- @@ -1813,6 +1823,47 @@ (define_insn "@aarch64_<optab>_lane_<mode>" [(set_attr "movprfx" "*,yes")] ) +;; unpredicated optab pattern for auto-vectorizer +;; The complex mla/mls operations always need to expand to two instructions. +;; The first operation does half the computation and the second does the +;; remainder. Because of this, expand early. +(define_expand "cml<fcmac1><rot_op><mode>4" + [(set (match_operand:SVE_FULL_I 0 "register_operand") + (plus:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand") + (unspec:SVE_FULL_I + [(match_operand:SVE_FULL_I 2 "register_operand") + (match_operand:SVE_FULL_I 3 "register_operand")] + SVE2_INT_CMLA_OP)))] + "TARGET_SVE2" +{ + emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (operands[0], operands[1], + operands[2], operands[3])); + emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], operands[0], + operands[2], operands[3])); + DONE; +}) + +;; unpredicated optab pattern for auto-vectorizer +;; The complex mul operations always need to expand to two instructions. +;; The first operation does half the computation and the second does the +;; remainder. Because of this, expand early. +(define_expand "cmul<rot_op><mode>3" + [(set (match_operand:SVE_FULL_I 0 "register_operand") + (unspec:SVE_FULL_I + [(match_operand:SVE_FULL_I 1 "register_operand") + (match_operand:SVE_FULL_I 2 "register_operand") + (match_dup 3)] + SVE2_INT_CMUL_OP))] + "TARGET_SVE2" +{ + operands[3] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); + emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (operands[0], operands[3], + operands[1], operands[2])); + emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], operands[0], + operands[1], operands[2])); + DONE; +}) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Complex dot product ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7662b929e2c4f6c103cc06e051eb574247320809..c11e976237d30771a7bd7c7fb56922f9c5c785de 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2583,6 +2583,23 @@ (define_int_iterator SVE2_INT_CMLA [UNSPEC_CMLA UNSPEC_SQRDCMLAH180 UNSPEC_SQRDCMLAH270]) +;; Unlike the normal CMLA instructions these represent the actual operation you +;; to be performed. They will always need to be expanded into multiple +;; sequences consisting of CMLA. +(define_int_iterator SVE2_INT_CMLA_OP [UNSPEC_CMLA + UNSPEC_CMLA180 + UNSPEC_CMLS]) + +;; Unlike the normal CMLA instructions these represent the actual operation you +;; to be performed. They will always need to be expanded into multiple +;; sequences consisting of CMLA. +(define_int_iterator SVE2_INT_CMUL_OP [UNSPEC_CMUL + UNSPEC_CMUL180]) + +;; Same as SVE2_INT_CADD but exclude the saturating instructions +(define_int_iterator SVE2_INT_CADD_OP [UNSPEC_CADD90 + UNSPEC_CADD270]) + (define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT UNSPEC_CDOT90 UNSPEC_CDOT180