Hi! The fma-*.c testcase show that these intrinsics probably mean to preserve the high elements (other than the lowest) of the first argument of the fmaintrin.h *_s{s,d} intrinsics in the destination (the HW insn preserve there the destination register, but that varies - for 132 and 213 it is the first one (but the negation performed for _mm_fnm*_s[sd] breaks it anyway), for 231 it is the last one). What the expander did was to put there an uninitialized pseudo, so we ended up with pretty random content, before H.J's http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=190492 it happened to work by accident, but when things changed slightly and reload chose different alternative, this broke.
The following patch fixes it, by tweaking the header so that the first argument is not negated (we negate the second one instead), as we don't want to negate the high elements if e.g. for whatever reason combiner doesn't match it. It fixes the expander to use a dup of the X operand as the high element provider for the pattern, removes the 231 alternatives (because those provide different destination high elements) and removes commutative marker (again, that would mean different high elements). Bootstrapped/regtested on x86_64-linux and i686-linux, additionally tested with make check-gcc RUNTESTFLAGS='--target_board=valgrind-sim/-m64 i386.exp=\*fma\*' Ok for trunk/4.7? 2012-09-13 Jakub Jelinek <ja...@redhat.com> PR target/54564 * config/i386/sse.md (fmai_vmfmadd_<mode>): Use (match_dup 1) instead of (match_dup 0) as second argument to vec_merge. (*fmai_fmadd_<mode>, *fmai_fmsub_<mode>): Likewise. Remove third alternative. (*fmai_fnmadd_<mode>, *fmai_fnmsub_<mode>): Likewise. Negate operand 2 instead of operand 1, but put it as first argument of fma. * config/i386/fmaintrin.h (_mm_fnmadd_sd, _mm_fnmadd_ss, _mm_fnmsub_sd, _mm_fnmsub_ss): Negate the second argument instead of the first. --- gcc/config/i386/sse.md.jj 2012-09-05 18:27:03.000000000 +0200 +++ gcc/config/i386/sse.md 2012-09-13 13:49:49.504968716 +0200 @@ -2072,79 +2072,75 @@ (define_expand "fmai_vmfmadd_<mode>" (match_operand:VF_128 1 "nonimmediate_operand") (match_operand:VF_128 2 "nonimmediate_operand") (match_operand:VF_128 3 "nonimmediate_operand")) - (match_dup 0) + (match_dup 1) (const_int 1)))] "TARGET_FMA") (define_insn "*fmai_fmadd_<mode>" - [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") + [(set (match_operand:VF_128 0 "register_operand" "=x,x") (vec_merge:VF_128 (fma:VF_128 - (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")) - (match_dup 0) + (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") + (match_operand:VF_128 2 "nonimmediate_operand" "xm, x") + (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")) + (match_dup 1) (const_int 1)))] "TARGET_FMA" "@ vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} - vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fmai_fmsub_<mode>" - [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") + [(set (match_operand:VF_128 0 "register_operand" "=x,x") (vec_merge:VF_128 (fma:VF_128 - (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") + (match_operand:VF_128 2 "nonimmediate_operand" "xm, x") (neg:VF_128 - (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))) - (match_dup 0) + (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))) + (match_dup 1) (const_int 1)))] "TARGET_FMA" "@ vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} - vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fmai_fnmadd_<mode>" - [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") + [(set (match_operand:VF_128 0 "register_operand" "=x,x") (vec_merge:VF_128 (fma:VF_128 (neg:VF_128 - (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")) - (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")) - (match_dup 0) + (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")) + (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") + (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")) + (match_dup 1) (const_int 1)))] "TARGET_FMA" "@ vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} - vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) (define_insn "*fmai_fnmsub_<mode>" - [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") + [(set (match_operand:VF_128 0 "register_operand" "=x,x") (vec_merge:VF_128 (fma:VF_128 (neg:VF_128 - (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")) - (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")) + (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") (neg:VF_128 - (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))) - (match_dup 0) + (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))) + (match_dup 1) (const_int 1)))] "TARGET_FMA" "@ vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} - vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) --- gcc/config/i386/fmaintrin.h.jj 2011-09-02 16:29:38.000000000 +0200 +++ gcc/config/i386/fmaintrin.h 2012-09-13 13:32:20.162333244 +0200 @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Free Software Foundation, Inc. +/* Copyright (C) 2011, 2012 Free Software Foundation, Inc. This file is part of GCC. @@ -164,7 +164,7 @@ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B, + return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B, (__v2df)__C); } @@ -172,7 +172,7 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B, + return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); } @@ -212,7 +212,7 @@ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B, + return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B, -(__v2df)__C); } @@ -220,7 +220,7 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B, + return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); } Jakub