Hi! Similarly to the V{4,8}SFmode -> unsigned V{4,8}SImode conversion support for AVX this one adds V{2,4}DFmode -> unsigned V{4,8}SImode conversion.
Ok for trunk? 2011-11-01 Jakub Jelinek <ja...@redhat.com> * config/i386/sse.md (ssepackfltmode): New mode attr. (vec_pack_ufix_trunc_<mode>): New expander using VF2 iterator. --- gcc/config/i386/sse.md.jj 2011-11-01 09:04:37.000000000 +0100 +++ gcc/config/i386/sse.md 2011-11-01 09:37:36.000000000 +0100 @@ -3127,6 +3127,56 @@ (define_expand "vec_pack_sfix_trunc_v2df DONE; }) +(define_mode_attr ssepackfltmode + [(V4DF "V8SI") (V2DF "V4SI")]) + +(define_expand "vec_pack_ufix_trunc_<mode>" + [(match_operand:<ssepackfltmode> 0 "register_operand" "") + (match_operand:VF2 1 "register_operand" "") + (match_operand:VF2 2 "register_operand" "")] + "TARGET_AVX" +{ + REAL_VALUE_TYPE MTWO32r, TWO31r; + rtx two31r, mtwo32r, tmp[8]; + int i; + + for (i = 0; i < 6; i++) + tmp[i] = gen_reg_rtx (<MODE>mode); + tmp[6] = gen_reg_rtx (<ssepackfltmode>mode); + tmp[7] = gen_reg_rtx (<ssepackfltmode>mode); + real_ldexp (&TWO31r, &dconst1, 31); + two31r = const_double_from_real_value (TWO31r, DFmode); + two31r = ix86_build_const_vector (<MODE>mode, 1, two31r); + two31r = force_reg (<MODE>mode, two31r); + real_ldexp (&MTWO32r, &dconstm1, 32); + mtwo32r = const_double_from_real_value (MTWO32r, DFmode); + mtwo32r = ix86_build_const_vector (<MODE>mode, 1, mtwo32r); + mtwo32r = force_reg (<MODE>mode, mtwo32r); + emit_insn (gen_avx_cmp<mode>3 (tmp[0], operands[1], two31r, GEN_INT (29))); + emit_insn (gen_avx_cmp<mode>3 (tmp[1], operands[2], two31r, GEN_INT (29))); + emit_insn (gen_and<mode>3 (tmp[2], tmp[0], mtwo32r)); + emit_insn (gen_and<mode>3 (tmp[3], tmp[1], mtwo32r)); + emit_insn (gen_add<mode>3 (tmp[4], operands[1], tmp[2])); + emit_insn (gen_add<mode>3 (tmp[5], operands[2], tmp[3])); + if (<MODE>mode == V4DFmode) + { + emit_insn (gen_avx_cvttpd2dq256_2 (tmp[6], tmp[4])); + emit_insn (gen_avx_cvttpd2dq256_2 (tmp[7], tmp[5])); + emit_insn (gen_avx_vperm2f128v8si3 (operands[0], tmp[6], tmp[7], + GEN_INT (0x20))); + } + else + { + emit_insn (gen_sse2_cvttpd2dq (tmp[6], tmp[4])); + emit_insn (gen_sse2_cvttpd2dq (tmp[7], tmp[5])); + emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, + operands[0]), + gen_lowpart (V2DImode, tmp[6]), + gen_lowpart (V2DImode, tmp[7]))); + } + DONE; +}) + (define_expand "vec_pack_sfix_v4df" [(match_operand:V8SI 0 "register_operand" "") (match_operand:V4DF 1 "nonimmediate_operand" "") Jakub