Attached patch implements V2SFmode FMA insn patterns. Patched compiler vectorizes FMA, FMS and FNMA instructions, but for some reason fails to vectorize FNMS.
I have double checked that the insn pattern is correct, and now I'm all out of ideas what could be wrong with the pattern, still ignored by the vectorizer. -fno-vect-cost-model does not help so it's time to ask the experts... gcc/ChangeLog: 2020-05-11 Uroš Bizjak <ubiz...@gmail.com> PR target/95046 * config/i386/mmx.md (fmav2sf4): New insn pattern. (fmsv2sf4): Ditto. (fnmav2sf4): Ditto. (fnmsv2sf4): Ditto. testsuite/ChangeLog: 2020-05-11 Uroš Bizjak <ubiz...@gmail.com> PR target/95046 * gcc.target/i386/pr95046-2.c: New test. Otherwise, the patch is bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Uros.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index a8f603b94f8..0024ce761d7 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -345,6 +345,70 @@ (set_attr "prefix" "*,orig,vex") (set_attr "mode" "V2SF,V4SF,V4SF")]) +(define_insn "fmav2sf4" + [(set (match_operand:V2SF 0 "register_operand" "=v,v,x") + (fma:V2SF + (match_operand:V2SF 1 "register_operand" "%0,v,x") + (match_operand:V2SF 2 "register_operand" "v,v,x") + (match_operand:V2SF 3 "register_operand" "v,0,x")))] + "(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE" + "@ + vfmadd132ps\t{%2, %3, %0|%0, %3, %2} + vfmadd231ps\t{%2, %1, %0|%0, %1, %2} + vfmaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma4") + (set_attr "type" "ssemuladd") + (set_attr "mode" "V4SF")]) + +(define_insn "fmsv2sf4" + [(set (match_operand:V2SF 0 "register_operand" "=v,v,x") + (fma:V2SF + (match_operand:V2SF 1 "register_operand" "%0,v,x") + (match_operand:V2SF 2 "register_operand" "v,v,x") + (neg:V2SF + (match_operand:V2SF 3 "register_operand" "v,0,x"))))] + "(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE" + "@ + vfmsub132ps\t{%2, %3, %0|%0, %3, %2} + vfmsub231ps\t{%2, %1, %0|%0, %1, %2} + vfmsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma4") + (set_attr "type" "ssemuladd") + (set_attr "mode" "V4SF")]) + +(define_insn "fnmav2sf4" + [(set (match_operand:V2SF 0 "register_operand" "=v,v,x") + (fma:V2SF + (neg:V2SF + (match_operand:V2SF 1 "register_operand" "%0,v,x")) + (match_operand:V2SF 2 "register_operand" "v,v,x") + (match_operand:V2SF 3 "register_operand" "v,0,x")))] + "(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE" + "@ + vfnmadd132ps\t{%2, %3, %0|%0, %3, %2} + vfnmadd231ps\t{%2, %1, %0|%0, %1, %2} + vfnmaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma4") + (set_attr "type" "ssemuladd") + (set_attr "mode" "V4SF")]) + +(define_insn "fnmsv2sf4" + [(set (match_operand:V2SF 0 "register_operand" "=v,v,x") + (fma:V2SF + (neg:V2SF + (match_operand:V2SF 1 "register_operand" "%0,v,x")) + (match_operand:V2SF 2 "register_operand" "v,v,x") + (neg:V2SF + (match_operand:V2SF 3 "register_operand" "v,0,x"))))] + "(TARGET_FMA || TARGET_FMA4) && TARGET_MMX_WITH_SSE" + "@ + vfnmsub132ps\t{%2, %3, %0|%0, %3, %2} + vfnmsub231ps\t{%2, %1, %0|%0, %1, %2} + vfnmsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma4") + (set_attr "type" "ssemuladd") + (set_attr "mode" "V4SF")]) + (define_expand "mmx_<code>v2sf3" [(set (match_operand:V2SF 0 "register_operand") (smaxmin:V2SF
/* PR target/95046 */ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O3 -mfma" } */ float r[2], a[2], b[2], c[2]; void test_fma (void) { for (int i = 0; i < 2; i++) r[i] = a[i] * b[i] + c[i]; } /* { dg-final { scan-assembler "fmadd132ps" } } */ void test_fms (void) { for (int i = 0; i < 2; i++) r[i] = a[i] * b[i] - c[i]; } /* { dg-final { scan-assembler "fmsub132ps" } } */ void test_fnma (void) { for (int i = 0; i < 2; i++) r[i] = -(a[i] * b[i]) + c[i]; } /* { dg-final { scan-assembler "fnmadd132ps" } } */ void test_fnms (void) { for (int i = 0; i < 2; i++) r[i] = -(a[i] * b[i]) - c[i]; } /* { dg-final { scan-assembler "fnmsub132ps" } } */