Re: [i386] scalar ops that preserve the high part of a vector

Marc Glisse Fri, 07 Dec 2012 21:47:29 -0800

On Fri, 7 Dec 2012, Marc Glisse wrote:

On Fri, 7 Dec 2012, Richard Henderson wrote:
On 2012-12-07 09:12, Marc Glisse wrote:
but change ix86_expand_vector_set and others to generate vec_merge
and have only the vec_merge define_insn in sse.md? I guess it would
buy a large part of it. That's a pretty invasive change, I'll have to
try...
Is it really that invasive?
No, changing only V2DF, I seem to have the basic pieces in place changingjust 6 patterns in sse.md and a couple functions in i386.c. Now I need totest it and see how much it affects the generated code...

Here is a patch that passes bootstrap+testsuite. I didn't notice anythingparticular about the code generated. Sure, something like_mm_add_sd(x,y)[1] simplifies with any of the vec_concat patches but notwith this vec_merge patch, but that's just a trivial missing piece of codein simplify-rtx.c that I'll want to write for 4.9 anyway. My personaltaste is still for vec_concat, but I'm ok with this one.


(Off topic remark: if I do
v2df x;
x[0]+=1;
y=(v2df){x[1],x[0]};

the compiler sees {x[1],x[0]+1} and never guesses that it should do addsdand then shuffle, whereas if I use _mm_add_sd I get the nice 2-line asm)



2012-12-08  Marc Glisse  <[email protected]>

        PR target/54855
gcc/
        * config/i386/sse.md (<sse>_vm<plusminus_insn><mode>3): Rewrite
        pattern.
        (sse2_loadlpd, sse2_loadhpd): Use vec_merge.
        * config/i386/i386-builtin-types.def: New function types.
        * config/i386/i386.c (ix86_expand_args_builtin): Likewise.
        (bdesc_args) <__builtin_ia32_addss, __builtin_ia32_subss,
        __builtin_ia32_addsd, __builtin_ia32_subsd>: Change prototype.
        (ix86_expand_vector_set): Use vec_merge for V2DF.
        * config/i386/xmmintrin.h: Adapt to new builtin prototype.
        * config/i386/emmintrin.h: Likewise.
        * doc/extend.texi (X86 Built-in Functions): Document changed prototype.

testsuite/
        * gcc.target/i386/pr54855-1.c: New testcase.
        * gcc.target/i386/pr54855-2.c: New testcase.

--
Marc Glisse

Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi (revision 194309)
+++ gcc/doc/extend.texi (working copy)
@@ -9843,22 +9843,22 @@ int __builtin_ia32_comige (v4sf, v4sf)
 int __builtin_ia32_ucomieq (v4sf, v4sf)
 int __builtin_ia32_ucomineq (v4sf, v4sf)
 int __builtin_ia32_ucomilt (v4sf, v4sf)
 int __builtin_ia32_ucomile (v4sf, v4sf)
 int __builtin_ia32_ucomigt (v4sf, v4sf)
 int __builtin_ia32_ucomige (v4sf, v4sf)
 v4sf __builtin_ia32_addps (v4sf, v4sf)
 v4sf __builtin_ia32_subps (v4sf, v4sf)
 v4sf __builtin_ia32_mulps (v4sf, v4sf)
 v4sf __builtin_ia32_divps (v4sf, v4sf)
-v4sf __builtin_ia32_addss (v4sf, v4sf)
-v4sf __builtin_ia32_subss (v4sf, v4sf)
+v4sf __builtin_ia32_addss (v4sf, float)
+v4sf __builtin_ia32_subss (v4sf, float)
 v4sf __builtin_ia32_mulss (v4sf, v4sf)
 v4sf __builtin_ia32_divss (v4sf, v4sf)
 v4si __builtin_ia32_cmpeqps (v4sf, v4sf)
 v4si __builtin_ia32_cmpltps (v4sf, v4sf)
 v4si __builtin_ia32_cmpleps (v4sf, v4sf)
 v4si __builtin_ia32_cmpgtps (v4sf, v4sf)
 v4si __builtin_ia32_cmpgeps (v4sf, v4sf)
 v4si __builtin_ia32_cmpunordps (v4sf, v4sf)
 v4si __builtin_ia32_cmpneqps (v4sf, v4sf)
 v4si __builtin_ia32_cmpnltps (v4sf, v4sf)
@@ -9964,22 +9964,22 @@ v2df __builtin_ia32_cmpunordsd (v2df, v2
 v2df __builtin_ia32_cmpneqsd (v2df, v2df)
 v2df __builtin_ia32_cmpnltsd (v2df, v2df)
 v2df __builtin_ia32_cmpnlesd (v2df, v2df)
 v2df __builtin_ia32_cmpordsd (v2df, v2df)
 v2di __builtin_ia32_paddq (v2di, v2di)
 v2di __builtin_ia32_psubq (v2di, v2di)
 v2df __builtin_ia32_addpd (v2df, v2df)
 v2df __builtin_ia32_subpd (v2df, v2df)
 v2df __builtin_ia32_mulpd (v2df, v2df)
 v2df __builtin_ia32_divpd (v2df, v2df)
-v2df __builtin_ia32_addsd (v2df, v2df)
-v2df __builtin_ia32_subsd (v2df, v2df)
+v2df __builtin_ia32_addsd (v2df, double)
+v2df __builtin_ia32_subsd (v2df, double)
 v2df __builtin_ia32_mulsd (v2df, v2df)
 v2df __builtin_ia32_divsd (v2df, v2df)
 v2df __builtin_ia32_minpd (v2df, v2df)
 v2df __builtin_ia32_maxpd (v2df, v2df)
 v2df __builtin_ia32_minsd (v2df, v2df)
 v2df __builtin_ia32_maxsd (v2df, v2df)
 v2df __builtin_ia32_andpd (v2df, v2df)
 v2df __builtin_ia32_andnpd (v2df, v2df)
 v2df __builtin_ia32_orpd (v2df, v2df)
 v2df __builtin_ia32_xorpd (v2df, v2df)
Index: gcc/testsuite/gcc.target/i386/pr54855-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr54855-1.c   (revision 0)
+++ gcc/testsuite/gcc.target/i386/pr54855-1.c   (revision 0)
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msse2" } */
+
+#include <emmintrin.h>
+
+__m128d f (__m128d x)
+{
+    __m128d y = { 2, 0 };
+      return _mm_add_sd (x, y);
+}
+
+__m128d g (__m128d x)
+{
+    __m128d y = { 1, 0 };
+      return _mm_sub_sd (x, y);
+}
+
+__m128d h (__m128d x, __m128d y)
+{
+    return _mm_add_sd (x, y);
+}
+
+__m128d i (__m128d x, __m128d y)
+{
+    return _mm_sub_sd (x, y);
+}
+
+__m128d j (__m128d x)
+{
+  x[0] += 2;
+  return x;
+}
+
+__m128d k (__m128d x)
+{
+  x[0] -= 1;
+  return x;
+}
+
+/* { dg-final { scan-assembler-not "mov" } } */

Property changes on: gcc/testsuite/gcc.target/i386/pr54855-1.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: gcc/testsuite/gcc.target/i386/pr54855-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr54855-2.c   (revision 0)
+++ gcc/testsuite/gcc.target/i386/pr54855-2.c   (revision 0)
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msse" } */
+
+#include <xmmintrin.h>
+
+__m128 f (__m128 x)
+{
+    __m128 y = { 2, 0, 0, 0 };
+      return _mm_add_ss (x, y);
+}
+
+__m128 g (__m128 x)
+{
+    __m128 y = { 1, 0, 0, 0 };
+      return _mm_sub_ss (x, y);
+}
+
+__m128 h (__m128 x, __m128 y)
+{
+    return _mm_add_ss (x, y);
+}
+
+__m128 i (__m128 x, __m128 y)
+{
+    return _mm_sub_ss (x, y);
+}
+
+__m128 j (__m128 x)
+{
+  x[0] += 2;
+  return x;
+}
+
+__m128 k (__m128 x)
+{
+  x[0] -= 1;
+  return x;
+}
+
+/* { dg-final { scan-assembler-not "mov" } } */

Property changes on: gcc/testsuite/gcc.target/i386/pr54855-2.c
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Author Date Id Revision URL

Index: gcc/config/i386/xmmintrin.h
===================================================================
--- gcc/config/i386/xmmintrin.h (revision 194309)
+++ gcc/config/i386/xmmintrin.h (working copy)
@@ -92,27 +92,27 @@ _mm_setzero_ps (void)
   return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
 }
 
 /* Perform the respective operation on the lower SPFP (single-precision
    floating-point) values of A and B; the upper three SPFP values are
    passed through from A.  */
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_add_ss (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
+  return (__m128) __builtin_ia32_addss ((__v4sf)__A, __B[0]);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_sub_ss (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
+  return (__m128) __builtin_ia32_subss ((__v4sf)__A, __B[0]);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_mul_ss (__m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_div_ss (__m128 __A, __m128 __B)
Index: gcc/config/i386/emmintrin.h
===================================================================
--- gcc/config/i386/emmintrin.h (revision 194309)
+++ gcc/config/i386/emmintrin.h (working copy)
@@ -226,33 +226,33 @@ _mm_cvtsi128_si64x (__m128i __A)
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_add_pd (__m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_add_sd (__m128d __A, __m128d __B)
 {
-  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
+  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, __B[0]);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_sub_pd (__m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_sub_sd (__m128d __A, __m128d __B)
 {
-  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
+  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, __B[0]);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_mul_pd (__m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_mul_sd (__m128d __A, __m128d __B)
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md      (revision 194309)
+++ gcc/config/i386/sse.md      (working copy)
@@ -858,23 +858,26 @@
    <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
    v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_vm<plusminus_insn><mode>3"
   [(set (match_operand:VF_128 0 "register_operand" "=x,x")
        (vec_merge:VF_128
-         (plusminus:VF_128
-           (match_operand:VF_128 1 "register_operand" "0,x")
-           (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
+         (vec_duplicate:VF_128
+           (plusminus:<ssescalarmode>
+             (vec_select:<ssescalarmode>
+               (match_operand:VF_128 1 "register_operand" "0,x")
+               (parallel [(const_int 0)]))
+             (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,xm")))
          (match_dup 1)
          (const_int 1)))]
   "TARGET_SSE"
   "@
    <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
    v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<ssescalarmode>")])
@@ -5006,106 +5009,103 @@
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    movlps\t{%1, %0|%0, %1}
    movaps\t{%1, %0|%0, %1}
    movlps\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V2SF,V4SF,V2SF")])
 
 (define_expand "sse2_loadhpd_exp"
   [(set (match_operand:V2DF 0 "nonimmediate_operand")
-       (vec_concat:V2DF
-         (vec_select:DF
-           (match_operand:V2DF 1 "nonimmediate_operand")
-           (parallel [(const_int 0)]))
-         (match_operand:DF 2 "nonimmediate_operand")))]
+       (vec_merge:V2DF
+         (vec_duplicate:V2DF (match_operand:DF 2 "nonimmediate_operand"))
+         (match_operand:V2DF 1 "nonimmediate_operand")
+         (const_int 2)))]
   "TARGET_SSE2"
 {
   rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
 
   emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
 
   /* Fix up the destination if needed.  */
   if (dst != operands[0])
     emit_move_insn (operands[0], dst);
 
   DONE;
 })
 
 ;; Avoid combining registers from different units in a single alternative,
 ;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "sse2_loadhpd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"
          "=x,x,x,x,o,o ,o")
-       (vec_concat:V2DF
-         (vec_select:DF
-           (match_operand:V2DF 1 "nonimmediate_operand"
+       (vec_merge:V2DF
+         (vec_duplicate:V2DF (match_operand:DF 2 "nonimmediate_operand"
+         " m,m,x,x,x,*f,r"))
+         (match_operand:V2DF 1 "nonimmediate_operand"
          " 0,x,0,x,0,0 ,0")
-           (parallel [(const_int 0)]))
-         (match_operand:DF 2 "nonimmediate_operand"
-         " m,m,x,x,x,*f,r")))]
+         (const_int 2)))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    movhpd\t{%2, %0|%0, %2}
    vmovhpd\t{%2, %1, %0|%0, %1, %2}
    unpcklpd\t{%2, %0|%0, %2}
    vunpcklpd\t{%2, %1, %0|%0, %1, %2}
    #
    #
    #"
   [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
    (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
    (set_attr "prefix_data16" "1,*,*,*,*,*,*")
    (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
    (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
 
 (define_split
   [(set (match_operand:V2DF 0 "memory_operand")
-       (vec_concat:V2DF
-         (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
-         (match_operand:DF 1 "register_operand")))]
+       (vec_merge:V2DF
+         (vec_duplicate:V2DF (match_operand:DF 1 "register_operand"))
+         (match_dup 0)
+         (const_int 2)))]
   "TARGET_SSE2 && reload_completed"
   [(set (match_dup 0) (match_dup 1))]
   "operands[0] = adjust_address (operands[0], DFmode, 8);")
 
 (define_expand "sse2_loadlpd_exp"
   [(set (match_operand:V2DF 0 "nonimmediate_operand")
-       (vec_concat:V2DF
-         (match_operand:DF 2 "nonimmediate_operand")
-         (vec_select:DF
-           (match_operand:V2DF 1 "nonimmediate_operand")
-           (parallel [(const_int 1)]))))]
+       (vec_merge:V2DF
+         (vec_duplicate:V2DF (match_operand:DF 2 "nonimmediate_operand"))
+         (match_operand:V2DF 1 "nonimmediate_operand")
+         (const_int 1)))]
   "TARGET_SSE2"
 {
   rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
 
   emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
 
   /* Fix up the destination if needed.  */
   if (dst != operands[0])
     emit_move_insn (operands[0], dst);
 
   DONE;
 })
 
 ;; Avoid combining registers from different units in a single alternative,
 ;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "sse2_loadlpd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"
          "=x,x,x,x,x,x,x,x,m,m ,m")
-       (vec_concat:V2DF
-         (match_operand:DF 2 "nonimmediate_operand"
-         " m,m,m,x,x,0,0,x,x,*f,r")
-         (vec_select:DF
-           (match_operand:V2DF 1 "vector_move_operand"
+       (vec_merge:V2DF
+         (vec_duplicate:V2DF (match_operand:DF 2 "nonimmediate_operand"
+         " m,m,m,x,x,0,0,x,x,*f,r"))
+         (match_operand:V2DF 1 "vector_move_operand"
          " C,0,x,0,x,x,o,o,0,0 ,0")
-           (parallel [(const_int 1)]))))]
+         (const_int 1)))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    %vmovsd\t{%2, %0|%0, %2}
    movlpd\t{%2, %0|%0, %2}
    vmovlpd\t{%2, %1, %0|%0, %1, %2}
    movsd\t{%2, %0|%0, %2}
    vmovsd\t{%2, %1, %0|%0, %1, %2}
    shufpd\t{$2, %1, %0|%0, %1, 2}
    movhpd\t{%H1, %0|%0, %H1}
    vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
@@ -5122,23 +5122,24 @@
              (const_string "imov")
           ]
           (const_string "ssemov")))
    (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
    (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
    (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
    (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
 
 (define_split
   [(set (match_operand:V2DF 0 "memory_operand")
-       (vec_concat:V2DF
-         (match_operand:DF 1 "register_operand")
-         (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
+       (vec_merge:V2DF
+         (vec_duplicate:V2DF (match_operand:DF 1 "register_operand"))
+         (match_dup 0)
+         (const_int 1)))]
   "TARGET_SSE2 && reload_completed"
   [(set (match_dup 0) (match_dup 1))]
   "operands[0] = adjust_address (operands[0], DFmode, 0);")
 
 (define_insn "sse2_movsd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,x,x,m,x,x,x,o")
        (vec_merge:V2DF
          (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
          (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
          (const_int 1)))]
Index: gcc/config/i386/i386-builtin-types.def
===================================================================
--- gcc/config/i386/i386-builtin-types.def      (revision 194309)
+++ gcc/config/i386/i386-builtin-types.def      (working copy)
@@ -263,20 +263,21 @@ DEF_FUNCTION_TYPE (UINT64, UINT64, UINT6
 DEF_FUNCTION_TYPE (UINT8, UINT8, INT)
 DEF_FUNCTION_TYPE (V16QI, V16QI, SI)
 DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI)
 DEF_FUNCTION_TYPE (V16QI, V8HI, V8HI)
 DEF_FUNCTION_TYPE (V1DI, V1DI, SI)
 DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI)
 DEF_FUNCTION_TYPE (V1DI, V2SI, V2SI)
 DEF_FUNCTION_TYPE (V1DI, V8QI, V8QI)
 DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DI)
 DEF_FUNCTION_TYPE (V2DF, V2DF, DI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, DOUBLE)
 DEF_FUNCTION_TYPE (V2DF, V2DF, INT)
 DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE)
 DEF_FUNCTION_TYPE (V2DF, V2DF, SI)
 DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF)
 DEF_FUNCTION_TYPE (V2DF, V2DF, V2DI)
 DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF)
 DEF_FUNCTION_TYPE (V2DF, V4DF, INT)
 DEF_FUNCTION_TYPE (V2DI, V16QI, V16QI)
 DEF_FUNCTION_TYPE (V2DI, V2DF, V2DF)
 DEF_FUNCTION_TYPE (V2DI, V2DI, INT)
@@ -296,20 +297,21 @@ DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DI)
 DEF_FUNCTION_TYPE (V4DF, V4DF, INT)
 DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF)
 DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI)
 DEF_FUNCTION_TYPE (V4HI, V2SI, V2SI)
 DEF_FUNCTION_TYPE (V4HI, V4HI, INT)
 DEF_FUNCTION_TYPE (V4HI, V4HI, SI)
 DEF_FUNCTION_TYPE (V4HI, V4HI, V4HI)
 DEF_FUNCTION_TYPE (V4HI, V8QI, V8QI)
 DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SI)
 DEF_FUNCTION_TYPE (V4SF, V4SF, DI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT)
 DEF_FUNCTION_TYPE (V4SF, V4SF, INT)
 DEF_FUNCTION_TYPE (V4SF, V4SF, PCV2SF)
 DEF_FUNCTION_TYPE (V4SF, V4SF, SI)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V2SI)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V4SI)
 DEF_FUNCTION_TYPE (V4SF, V8SF, INT)
 DEF_FUNCTION_TYPE (V4SI, V2DF, V2DF)
 DEF_FUNCTION_TYPE (V4SI, V4SF, V4SF)
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c      (revision 194309)
+++ gcc/config/i386/i386.c      (working copy)
@@ -27070,22 +27070,22 @@ static const struct builtin_description
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", 
IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", 
IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 
"__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) 
INT64_FTYPE_V4SF },
 
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", 
IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
 
   { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", 
IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", 
IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", 
IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", 
IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
-  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", 
IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
-  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", 
IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", 
IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_FLOAT },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", 
IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_FLOAT },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", 
IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", 
IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
 
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", 
IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", 
IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", 
IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", 
IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", 
IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, 
"__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) 
V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", 
IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
@@ -27174,22 +27174,22 @@ static const struct builtin_description
   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 
"__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) 
INT64_FTYPE_V2DF },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", 
IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", 
IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, 
"__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) 
V4SI_FTYPE_V4SF },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", 
IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", 
IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", 
IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", 
IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", 
IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", 
IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", 
IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DOUBLE },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", 
IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DOUBLE },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", 
IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", 
IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, 
"__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF 
},
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, 
"__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF 
},
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, 
"__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF 
},
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, 
"__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) 
V2DF_FTYPE_V2DF_V2DF_SWAP },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, 
"__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) 
V2DF_FTYPE_V2DF_V2DF_SWAP},
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, 
"__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) 
V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, 
"__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) 
V2DF_FTYPE_V2DF_V2DF },
@@ -30801,34 +30801,36 @@ ix86_expand_args_builtin (const struct b
     case V4HI_FTYPE_V8QI_V8QI:
     case V4HI_FTYPE_V2SI_V2SI:
     case V4DF_FTYPE_V4DF_V4DF:
     case V4DF_FTYPE_V4DF_V4DI:
     case V4SF_FTYPE_V4SF_V4SF:
     case V4SF_FTYPE_V4SF_V4SI:
     case V4SF_FTYPE_V4SF_V2SI:
     case V4SF_FTYPE_V4SF_V2DF:
     case V4SF_FTYPE_V4SF_DI:
     case V4SF_FTYPE_V4SF_SI:
+    case V4SF_FTYPE_V4SF_FLOAT:
     case V2DI_FTYPE_V2DI_V2DI:
     case V2DI_FTYPE_V16QI_V16QI:
     case V2DI_FTYPE_V4SI_V4SI:
     case V2UDI_FTYPE_V4USI_V4USI:
     case V2DI_FTYPE_V2DI_V16QI:
     case V2DI_FTYPE_V2DF_V2DF:
     case V2SI_FTYPE_V2SI_V2SI:
     case V2SI_FTYPE_V4HI_V4HI:
     case V2SI_FTYPE_V2SF_V2SF:
     case V2DF_FTYPE_V2DF_V2DF:
     case V2DF_FTYPE_V2DF_V4SF:
     case V2DF_FTYPE_V2DF_V2DI:
     case V2DF_FTYPE_V2DF_DI:
     case V2DF_FTYPE_V2DF_SI:
+    case V2DF_FTYPE_V2DF_DOUBLE:
     case V2SF_FTYPE_V2SF_V2SF:
     case V1DI_FTYPE_V1DI_V1DI:
     case V1DI_FTYPE_V8QI_V8QI:
     case V1DI_FTYPE_V2SI_V2SI:
     case V32QI_FTYPE_V16HI_V16HI:
     case V16HI_FTYPE_V8SI_V8SI:
     case V32QI_FTYPE_V32QI_V32QI:
     case V16HI_FTYPE_V32QI_V32QI:
     case V16HI_FTYPE_V16HI_V16HI:
     case V8SI_FTYPE_V4DF_V4DF:
@@ -36442,38 +36444,22 @@ ix86_expand_vector_set (bool mmx_ok, rtx
       tmp = gen_reg_rtx (GET_MODE_INNER (mode));
       ix86_expand_vector_extract (false, tmp, target, 1 - elt);
       if (elt == 0)
        tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
       else
        tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
       return;
 
     case V2DFmode:
-      {
-       rtx op0, op1;
-
-       /* For the two element vectors, we implement a VEC_CONCAT with
-          the extraction of the other element.  */
-
-       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
-       tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
-
-       if (elt == 0)
-         op0 = val, op1 = tmp;
-       else
-         op0 = tmp, op1 = val;
-
-       tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
-       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
-      }
-      return;
+      use_vec_merge = TARGET_SSE2;
+      break;
 
     case V4SFmode:
       use_vec_merge = TARGET_SSE4_1;
       if (use_vec_merge)
        break;
 
       switch (elt)
        {
        case 0:
          use_vec_merge = true;

Re: [i386] scalar ops that preserve the high part of a vector

Reply via email to