On Thu, May 9, 2013 at 9:43 PM, Uros Bizjak <ubiz...@gmail.com> wrote:
> movd from xmm register to integer register also zero-extends the value > in integer register. Also, the patch adds a couple of missing > alternatives to *vec_extractv4si, we can "extract" the values using > 128bit SSE shifts. > > 2013-05-09 Uros Bizjak <ubiz...@gmail.com> > > * config/i386/sse.md (*vec_extractv4si_0_zext): New pattern. > (*vec_extractv4si_zext_mem): Ditto. > (*vec_extractv2di): Add 0->x and x->x alternatives. > * config/i386/mmx.md (*vec_extractv2si_zext_mem): New pattern. > * config/i386/i386.md (*zero_extendsidi2): Add *Yj->?r alternative. > > Patch was tested on x86_64-pc-linux-gnu and committed to mainline SVN. Now with a patch. Uros.
Index: i386.md =================================================================== --- i386.md (revision 198747) +++ i386.md (working copy) @@ -3088,10 +3088,10 @@ (define_insn "*zero_extendsidi2" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r,?r,?o,r ,o,?*Ym,?!*y,?*Yi,?*x") + "=r,?r,?o,r ,o,?*Ym,?!*y,?r ,?*Yi,?*x") (zero_extend:DI (match_operand:SI 1 "x86_64_zext_operand" - "0 ,rm,r ,rmWz,0,r ,m ,r ,m")))] + "0 ,rm,r ,rmWz,0,r ,m ,*Yj,r ,m")))] "" { switch (get_attr_type (insn)) @@ -3109,6 +3109,9 @@ return "movd\t{%1, %0|%0, %1}"; case TYPE_SSEMOV: + if (GENERAL_REG_P (operands[0])) + return "%vmovd\t{%1, %k0|%k0, %1}"; + return "%vmovd\t{%1, %0|%0, %1}"; default: @@ -3118,9 +3121,9 @@ [(set (attr "isa") (cond [(eq_attr "alternative" "0,1,2") (const_string "nox64") - (eq_attr "alternative" "3") + (eq_attr "alternative" "3,7") (const_string "x64") - (eq_attr "alternative" "8") + (eq_attr "alternative" "9") (const_string "sse2") ] (const_string "*"))) @@ -3129,7 +3132,7 @@ (const_string "multi") (eq_attr "alternative" "5,6") (const_string "mmxmov") - (eq_attr "alternative" "7,8") + (eq_attr "alternative" "7,8,9") (const_string "ssemov") ] (const_string "imovx"))) @@ -3144,7 +3147,7 @@ (set (attr "mode") (cond [(eq_attr "alternative" "5,6") (const_string "DI") - (eq_attr "alternative" "7,8") + (eq_attr "alternative" "7,8,9") (const_string "TI") ] (const_string "SI")))]) Index: mmx.md =================================================================== --- mmx.md (revision 198747) +++ mmx.md (working copy) @@ -1323,6 +1323,20 @@ [(set (match_dup 0) (match_dup 1))] "operands[1] = adjust_address (operands[1], SImode, 4);") +(define_insn_and_split "*vec_extractv2si_zext_mem" + [(set (match_operand:DI 0 "register_operand" "=y,x,r") + (zero_extend:DI + (vec_select:SI + (match_operand:V2SI 1 "memory_operand" "o,o,o") + (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))] + "TARGET_64BIT && TARGET_MMX" + "#" + "&& reload_completed" + [(set (match_dup 0) (zero_extend:DI (match_dup 1)))] +{ + operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4); +}) + (define_expand "vec_extractv2si" [(match_operand:SI 0 "register_operand") (match_operand:V2SI 1 "register_operand") Index: sse.md =================================================================== --- sse.md (revision 198747) +++ sse.md (working copy) @@ -7331,6 +7331,18 @@ "#" [(set_attr "isa" "*,sse4,*,*")]) +(define_insn_and_split "*vec_extractv4si_0_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC" + "#" + "&& reload_completed" + [(set (match_dup 0) (zero_extend:DI (match_dup 1)))] + "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));") + (define_insn "*vec_extractv2di_0_sse" [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m") (vec_select:DI @@ -7350,16 +7362,35 @@ "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));") (define_insn "*vec_extractv4si" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x") (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x") + (match_operand:V4SI 1 "register_operand" "x,0,x") (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))] "TARGET_SSE4_1" - "%vpextrd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog1") - (set_attr "prefix_extra" "1") +{ + switch (which_alternative) + { + case 0: + return "%vpextrd\t{%2, %1, %0|%0, %1, %2}"; + + case 1: + operands [2] = GEN_INT (INTVAL (operands[2]) * 4); + return "psrldq\t{%2, %0|%0, %2}"; + + case 2: + operands [2] = GEN_INT (INTVAL (operands[2]) * 4); + return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*,noavx,avx") + (set_attr "type" "sselog1,sseishft1,sseishft1") + (set_attr "memory" "*,none,none") + (set_attr "prefix_extra" "1,*,*") (set_attr "length_immediate" "1") - (set_attr "prefix" "maybe_vex") + (set_attr "prefix" "maybe_vex,orig,vex") (set_attr "mode" "TI")]) (define_insn "*vec_extractv4si_zext" @@ -7384,6 +7415,20 @@ "TARGET_SSE" "#") +(define_insn_and_split "*vec_extractv4si_zext_mem" + [(set (match_operand:DI 0 "register_operand" "=x,r") + (zero_extend:DI + (vec_select:SI + (match_operand:V4SI 1 "memory_operand" "o,o") + (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))] + "TARGET_64BIT && TARGET_SSE" + "#" + "&& reload_completed" + [(set (match_dup 0) (zero_extend:DI (match_dup 1)))] +{ + operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4); +}) + (define_insn "*vec_extractv2di_1" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r") (vec_select:DI