Hi All,
This adds implementation for the optabs for complex operations. With this the
following C code:
void f90 (int _Complex a[restrict N], int _Complex b[restrict N],
int _Complex c[restrict N])
{
for (int i=0; i < N; i++)
c[i] = a[i] + (b[i] * I);
}
generates
f90:
mov x3, 0
mov x4, 200
whilelo p0.s, xzr, x4
.p2align 3,,7
.L2:
ld1w z0.s, p0/z, [x0, x3, lsl 2]
ld1w z1.s, p0/z, [x1, x3, lsl 2]
cadd z0.s, z0.s, z1.s, #90
st1w z0.s, p0, [x2, x3, lsl 2]
incw x3
whilelo p0.s, x3, x4
b.any .L2
ret
instead of
f90:
mov x3, 0
mov x4, 0
mov w5, 100
whilelo p0.s, wzr, w5
.p2align 3,,7
.L2:
ld2w {z4.s - z5.s}, p0/z, [x0, x3, lsl 2]
ld2w {z2.s - z3.s}, p0/z, [x1, x3, lsl 2]
sub z0.s, z4.s, z3.s
add z1.s, z5.s, z2.s
st2w {z0.s - z1.s}, p0, [x2, x3, lsl 2]
incw x4
inch x3
whilelo p0.s, w4, w5
b.any .L2
ret
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64-sve2.md (cadd<rot><mode>3,
cml<fcmac1><rot_op><mode>4, cmul<rot_op><mode>3): New.
* config/aarch64/iterators.md (SVE2_INT_CMLA_OP, SVE2_INT_CMUL_OP,
SVE2_INT_CADD_OP): New.
--
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index e18b9fef16e72496588fb5850e362da4ae42898a..e601c6a4586e3ed1e11aedf047f56d556a99a302 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1774,6 +1774,16 @@ (define_insn "@aarch64_sve_<optab><mode>"
[(set_attr "movprfx" "*,yes")]
)
+;; unpredicated optab pattern for auto-vectorizer
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand")]
+ SVE2_INT_CADD_OP))]
+ "TARGET_SVE2"
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Complex ternary operations
;; -------------------------------------------------------------------------
@@ -1813,6 +1823,47 @@ (define_insn "@aarch64_<optab>_lane_<mode>"
[(set_attr "movprfx" "*,yes")]
)
+;; unpredicated optab pattern for auto-vectorizer
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cml<fcmac1><rot_op><mode>4"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (plus:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 2 "register_operand")
+ (match_operand:SVE_FULL_I 3 "register_operand")]
+ SVE2_INT_CMLA_OP)))]
+ "TARGET_SVE2"
+{
+ emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (operands[0], operands[1],
+ operands[2], operands[3]));
+ emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], operands[0],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; unpredicated optab pattern for auto-vectorizer
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cmul<rot_op><mode>3"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand")
+ (match_dup 3)]
+ SVE2_INT_CMUL_OP))]
+ "TARGET_SVE2"
+{
+ operands[3] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+ emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (operands[0], operands[3],
+ operands[1], operands[2]));
+ emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], operands[0],
+ operands[1], operands[2]));
+ DONE;
+})
+
;; -------------------------------------------------------------------------
;; ---- [INT] Complex dot product
;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 7662b929e2c4f6c103cc06e051eb574247320809..c11e976237d30771a7bd7c7fb56922f9c5c785de 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2583,6 +2583,23 @@ (define_int_iterator SVE2_INT_CMLA [UNSPEC_CMLA
UNSPEC_SQRDCMLAH180
UNSPEC_SQRDCMLAH270])
+;; Unlike the normal CMLA instructions these represent the actual operation you
+;; to be performed. They will always need to be expanded into multiple
+;; sequences consisting of CMLA.
+(define_int_iterator SVE2_INT_CMLA_OP [UNSPEC_CMLA
+ UNSPEC_CMLA180
+ UNSPEC_CMLS])
+
+;; Unlike the normal CMLA instructions these represent the actual operation you
+;; to be performed. They will always need to be expanded into multiple
+;; sequences consisting of CMLA.
+(define_int_iterator SVE2_INT_CMUL_OP [UNSPEC_CMUL
+ UNSPEC_CMUL180])
+
+;; Same as SVE2_INT_CADD but exclude the saturating instructions
+(define_int_iterator SVE2_INT_CADD_OP [UNSPEC_CADD90
+ UNSPEC_CADD270])
+
(define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT
UNSPEC_CDOT90
UNSPEC_CDOT180