Hello! unpckhps and punpckhpd are not correct insns to move 32bit X[1] to X[0] in XMM registers. These two insns move X[2] to X[0]. The patch uses movshdup, pshufd and shufps instead.
This problem however is a non-issue since MMX moves are never generated, but insn pattern should be fixed nevertheless. 2014-12-27 Uros Bizjak <ubiz...@gmail.com> * config/i386/mmx.md (*vec_extractv2sf_1): Do not emit unpckhps. Emit movshdup for SSE3 and shufps otherwise. (*vec_extractv2si_1): Do not emit punpckhdq and unpckhps. Emit pshufd for SSE2 and shufps otherwise. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}, committed to mainline SVN and 4.9 branch. Uros.
Index: mmx.md =================================================================== --- mmx.md (revision 219073) +++ mmx.md (working copy) @@ -600,20 +600,25 @@ ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "*vec_extractv2sf_1" - [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,y,x,f,r") + [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,x,y,x,f,r") (vec_select:SF - (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o,o,o,o") + (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o") (parallel [(const_int 1)])))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ punpckhdq\t%0, %0 - unpckhps\t%0, %0 + %vmovshdup\t{%1, %0|%0, %1} + shufps\t{$0xe5, %1, %0|%0, %1, 0xe5} # # # #" - [(set_attr "type" "mmxcvt,sselog1,mmxmov,ssemov,fmov,imov") - (set_attr "mode" "DI,V4SF,SF,SF,SF,SF")]) + [(set_attr "isa" "*,sse3,noavx,*,*,*,*") + (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov") + (set_attr "length_immediate" "*,*,1,*,*,*,*") + (set_attr "prefix_rep" "*,1,*,*,*,*,*") + (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig") + (set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")]) (define_split [(set (match_operand:SF 0 "register_operand") @@ -1288,26 +1293,23 @@ ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "*vec_extractv2si_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=y,x,x,x,y,x,r") + [(set (match_operand:SI 0 "nonimmediate_operand" "=y,x,x,y,x,r") (vec_select:SI - (match_operand:V2SI 1 "nonimmediate_operand" " 0,0,x,0,o,o,o") + (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o") (parallel [(const_int 1)])))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ punpckhdq\t%0, %0 - punpckhdq\t%0, %0 - pshufd\t{$85, %1, %0|%0, %1, 85} - unpckhps\t%0, %0 + %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5} + shufps\t{$0xe5, %1, %0|%0, %1, 0xe5} # # #" - [(set (attr "isa") - (if_then_else (eq_attr "alternative" "1,2") - (const_string "sse2") - (const_string "*"))) - (set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,mmxmov,ssemov,imov") - (set_attr "length_immediate" "*,*,1,*,*,*,*") - (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")]) + [(set_attr "isa" "*,sse2,noavx,*,*,*") + (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov") + (set_attr "length_immediate" "*,1,1,*,*,*") + (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig") + (set_attr "mode" "DI,TI,V4SF,SI,SI,SI")]) (define_split [(set (match_operand:SI 0 "register_operand")