Many SSE cvt instructions take 64bit memory source
instead of 128bit. In

(define_insn "sse2_cvtps2pd"
  [(set (match_operand:V2DF 0 "register_operand" "=x") 
        (float_extend:V2DF
          (vec_select:V2SF
            (match_operand:V4SF 1 "nonimmediate_operand" "xm") 
            (parallel [(const_int 0) (const_int 1)]))))]
  "TARGET_SSE2"
  "%vcvtps2pd\t{%1, %0|%0, %1}"
  [(set_attr "type" "ssecvt")
   (set_attr "prefix" "maybe_vex")
   (set_attr "mode" "V2DF")
   (set_attr "prefix_data16" "0")
   (set_attr "amdfam10_decode" "direct")])

memory operand is V2SF, not V4SF. As the result, we get

[...@gnu-6 tmp]$ cat x.c
float v2sf[8] __attribute ((aligned(16)));
double v4df[8] __attribute ((aligned(16)));
void
foo ()
{
  int i;
  for (i = 0; i < 8; i++)
    v4df[i] = v2sf[i];
}
[...@gnu-6 tmp]$ /usr/gcc-4.5/bin/gcc -S -O3 x.c
[...@gnu-6 tmp]$ cat x.s
        .file   "x.c"
        .text
        .p2align 4,,15
.globl foo
        .type   foo, @function
foo:
.LFB0:
        .cfi_startproc
        movaps  v2sf(%rip), %xmm1
        cvtps2pd        %xmm1, %xmm0
        movapd  %xmm0, v4df(%rip)
        xorps   %xmm0, %xmm0
        movhlps %xmm1, %xmm0
        movaps  v2sf+16(%rip), %xmm1
        cvtps2pd        %xmm0, %xmm0
        movapd  %xmm0, v4df+16(%rip)
        cvtps2pd        %xmm1, %xmm0
        movapd  %xmm0, v4df+32(%rip)
        xorps   %xmm0, %xmm0
        movhlps %xmm1, %xmm0
        cvtps2pd        %xmm0, %xmm0
        movapd  %xmm0, v4df+48(%rip)
        ret

instead of

[...@gnu-6 tmp]$ /opt/intel/Compiler/11.1/059/bin/intel64/icc -S -O2 x.c
[...@gnu-6 tmp]$ cat x.s
# -- Machine type EFI2
# mark_description "Intel(R) C++ Compiler for applications running on Intel(R)
64, Version 11.1    Build 20091012 %s";
# mark_description "-S -O2";
        .file "x.c"
        .text
..TXTST0:
# -- Begin  foo
# mark_begin;
       .align    16,0x90
        .globl foo
foo:
..B1.1:                         # Preds ..B1.0
..___tag_value_foo.1:                                           #5.1
        cvtps2pd  v2sf(%rip), %xmm0                             #8.15
        cvtps2pd  8+v2sf(%rip), %xmm1                           #8.15
        cvtps2pd  16+v2sf(%rip), %xmm2                          #8.15
        cvtps2pd  24+v2sf(%rip), %xmm3                          #8.15
        movaps    %xmm0, v4df(%rip)                             #8.5
        movaps    %xmm1, 16+v4df(%rip)                          #8.5
        movaps    %xmm2, 32+v4df(%rip)                          #8.5
        movaps    %xmm3, 48+v4df(%rip)                          #8.5
        ret                                                     #9.1


-- 
           Summary: Incorrect sse2_cvtX2Y pattern
           Product: gcc
           Version: 4.5.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: target
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: hjl dot tools at gmail dot com


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43618

Reply via email to