Many SSE cvt instructions take 64bit memory source instead of 128bit. In (define_insn "sse2_cvtps2pd" [(set (match_operand:V2DF 0 "register_operand" "=x") (float_extend:V2DF (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE2" "%vcvtps2pd\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V2DF") (set_attr "prefix_data16" "0") (set_attr "amdfam10_decode" "direct")])
memory operand is V2SF, not V4SF. As the result, we get [...@gnu-6 tmp]$ cat x.c float v2sf[8] __attribute ((aligned(16))); double v4df[8] __attribute ((aligned(16))); void foo () { int i; for (i = 0; i < 8; i++) v4df[i] = v2sf[i]; } [...@gnu-6 tmp]$ /usr/gcc-4.5/bin/gcc -S -O3 x.c [...@gnu-6 tmp]$ cat x.s .file "x.c" .text .p2align 4,,15 .globl foo .type foo, @function foo: .LFB0: .cfi_startproc movaps v2sf(%rip), %xmm1 cvtps2pd %xmm1, %xmm0 movapd %xmm0, v4df(%rip) xorps %xmm0, %xmm0 movhlps %xmm1, %xmm0 movaps v2sf+16(%rip), %xmm1 cvtps2pd %xmm0, %xmm0 movapd %xmm0, v4df+16(%rip) cvtps2pd %xmm1, %xmm0 movapd %xmm0, v4df+32(%rip) xorps %xmm0, %xmm0 movhlps %xmm1, %xmm0 cvtps2pd %xmm0, %xmm0 movapd %xmm0, v4df+48(%rip) ret instead of [...@gnu-6 tmp]$ /opt/intel/Compiler/11.1/059/bin/intel64/icc -S -O2 x.c [...@gnu-6 tmp]$ cat x.s # -- Machine type EFI2 # mark_description "Intel(R) C++ Compiler for applications running on Intel(R) 64, Version 11.1 Build 20091012 %s"; # mark_description "-S -O2"; .file "x.c" .text ..TXTST0: # -- Begin foo # mark_begin; .align 16,0x90 .globl foo foo: ..B1.1: # Preds ..B1.0 ..___tag_value_foo.1: #5.1 cvtps2pd v2sf(%rip), %xmm0 #8.15 cvtps2pd 8+v2sf(%rip), %xmm1 #8.15 cvtps2pd 16+v2sf(%rip), %xmm2 #8.15 cvtps2pd 24+v2sf(%rip), %xmm3 #8.15 movaps %xmm0, v4df(%rip) #8.5 movaps %xmm1, 16+v4df(%rip) #8.5 movaps %xmm2, 32+v4df(%rip) #8.5 movaps %xmm3, 48+v4df(%rip) #8.5 ret #9.1 -- Summary: Incorrect sse2_cvtX2Y pattern Product: gcc Version: 4.5.0 Status: UNCONFIRMED Severity: enhancement Priority: P3 Component: target AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: hjl dot tools at gmail dot com http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43618