[Sorry for the re-send. I spotted that the attributes were not right for the new pattern I was adding. The change between this and the first version was:
+ [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") + (set_attr "length" "4,4,4,12")] ] --- Hi, In this testcase, all argument registers and the return register will be general purpose registers: long long foo (long long a, long long b, long long c) { return ((a ^ b) & c) ^ b; } However, due to the implementation of aarch64_simd_bsl<mode>_internal we'll match that pattern and emit a BSL, necessitating moving all those arguments and results to the Advanced SIMD registers: fmov d2, x0 fmov d0, x2 fmov d1, x1 bsl v0.8b, v2.8b, v1.8b fmov x0, d0 To fix this, we turn aarch64_simd_bsldi_internal in to an insn_and_split that knows to split back to integer operations if the register allocation falls that way. We could have used an unspec, but then we lose some of the nice simplifications that can be made from explicitly spelling out the semantics of BSL. Bootstrapped on aarch64-none-linux-gnu. OK? Thanks, James --- gcc/ 2017-06-12 James Greenhalgh <james.greenha...@arm.com> * config/aarch64/aarch64-simd.md (aarch64_simd_bsl<mode>_internal): Remove DImode. (*aarch64_simd_bsl<mode>_alt): Likewise. (aarch64_simd_bsldi_internal): New. gcc/testsuite/ 2017-06-12 James Greenhalgh <james.greenha...@arm.com> * gcc.target/aarch64/no-dimode-bsl.c: New. * gcc.target/aarch64/dimode-bsl.c: New.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c5a86ff..7b6b12f 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2256,13 +2256,13 @@ ;; in *aarch64_simd_bsl<mode>_alt. (define_insn "aarch64_simd_bsl<mode>_internal" - [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") - (xor:VSDQ_I_DI - (and:VSDQ_I_DI - (xor:VSDQ_I_DI + [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") + (xor:VDQ_I + (and:VDQ_I + (xor:VDQ_I (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w") - (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0")) - (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w")) + (match_operand:VDQ_I 2 "register_operand" "w,w,0")) + (match_operand:VDQ_I 1 "register_operand" "0,w,w")) (match_dup:<V_cmp_result> 3) ))] "TARGET_SIMD" @@ -2280,14 +2280,14 @@ ;; permutations of commutative operations, we have to have a separate pattern. (define_insn "*aarch64_simd_bsl<mode>_alt" - [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") - (xor:VSDQ_I_DI - (and:VSDQ_I_DI - (xor:VSDQ_I_DI - (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0") - (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w")) - (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w")) - (match_dup:VSDQ_I_DI 2)))] + [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") + (xor:VDQ_I + (and:VDQ_I + (xor:VDQ_I + (match_operand:VDQ_I 3 "register_operand" "w,w,0") + (match_operand:VDQ_I 2 "register_operand" "w,0,w")) + (match_operand:VDQ_I 1 "register_operand" "0,w,w")) + (match_dup:VDQ_I 2)))] "TARGET_SIMD" "@ bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype> @@ -2296,6 +2296,45 @@ [(set_attr "type" "neon_bsl<q>")] ) +;; DImode is special, we want to avoid computing operations which are +;; more naturally computed in general purpose registers in the vector +;; registers. If we do that, we need to move all three operands from general +;; purpose registers to vector registers, then back again. However, we +;; don't want to make this pattern an UNSPEC as we'd lose scope for +;; optimizations based on the component operations of a BSL. +;; +;; That means we need a splitter back to the individual operations, if they +;; would be better calculated on the integer side. + +(define_insn_and_split "aarch64_simd_bsldi_internal" + [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") + (xor:DI + (and:DI + (xor:DI + (match_operand:DI 3 "register_operand" "w,0,w,r") + (match_operand:DI 2 "register_operand" "w,w,0,r")) + (match_operand:DI 1 "register_operand" "0,w,w,r")) + (match_dup:DI 3) + ))] + "TARGET_SIMD" + "@ + bsl\\t%0.8b, %2.8b, %3.8b + bit\\t%0.8b, %2.8b, %1.8b + bif\\t%0.8b, %3.8b, %1.8b + #" + "&& GP_REGNUM_P (REGNO (operands[0]))" + [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)] +{ + /* Split back to individual operations. */ + emit_insn (gen_xordi3 (operands[0], operands[2], operands[3])); + emit_insn (gen_anddi3 (operands[0], operands[0], operands[1])); + emit_insn (gen_xordi3 (operands[0], operands[0], operands[3])); + DONE; +} + [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") + (set_attr "length" "4,4,4,12")] +) + (define_expand "aarch64_simd_bsl<mode>" [(match_operand:VALLDIF 0 "register_operand") (match_operand:<V_cmp_result> 1 "register_operand") diff --git a/gcc/testsuite/gcc.target/aarch64/dimode-bsl.c b/gcc/testsuite/gcc.target/aarch64/dimode-bsl.c new file mode 100644 index 0000000..4e63511 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/dimode-bsl.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* Test that we can generate DImode BSL when we are using + copysign. */ + +double +foo (double a, double b) +{ + return __builtin_copysign (a, b); +} + +/* { dg-final { scan-assembler "bsl\tv\[0-9\]" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/no-dimode-bsl.c b/gcc/testsuite/gcc.target/aarch64/no-dimode-bsl.c new file mode 100644 index 0000000..67dfda0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/no-dimode-bsl.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* Test that we don't combine to BSL when in DImode, avoiding register + moves in the general case. + + We want: + eor x0, x0, x1 + and x0, x0, x2 + eor x0, x0, x1 + ret + + Rather than: + fmov d2, x0 + fmov d0, x2 + fmov d1, x1 + bsl v0.8b, v2.8b, v1.8b + fmov x0, d0 + ret */ + +long long +foo (long long a, long long b, long long c) +{ + return ((a ^ b) & c) ^ b; +} + +/* { dg-final { scan-assembler-not "bsl\tv\[0-9\]" } } */ +/* { dg-final { scan-assembler-not "bif\tv\[0-9\]" } } */ +/* { dg-final { scan-assembler-not "bit\tv\[0-9\]" } } */ +/* { dg-final { scan-assembler-not "fmov\td\[0-9\]" } } */