Hi! This patch allows to vectorize char a[1024], c[1024]; long long b[1024]; void foo (void) { int i; for (i = 0; i < 1024; i++) b[i] = a[i] + 3 * c[i]; } using 32-byte vectors with -mavx2. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2011-10-12 Jakub Jelinek <ja...@redhat.com> * config/i386/sse.md (vec_unpacks_lo_<mode>, vec_unpacks_hi_<mode>, vec_unpacku_lo_<mode>, vec_unpacku_hi_<mode>): Change VI124_128 mode to VI124_AVX2. * config/i386/i386.c (ix86_expand_sse_unpack): Handle V32QImode, V16HImode and V8SImode for TARGET_AVX2. --- gcc/config/i386/sse.md.jj 2011-10-12 15:42:12.000000000 +0200 +++ gcc/config/i386/sse.md 2011-10-12 16:16:49.000000000 +0200 @@ -7536,25 +7536,25 @@ (define_insn "vec_concatv2di" (define_expand "vec_unpacks_lo_<mode>" [(match_operand:<sseunpackmode> 0 "register_operand" "") - (match_operand:VI124_128 1 "register_operand" "")] + (match_operand:VI124_AVX2 1 "register_operand" "")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands, false, false); DONE;") (define_expand "vec_unpacks_hi_<mode>" [(match_operand:<sseunpackmode> 0 "register_operand" "") - (match_operand:VI124_128 1 "register_operand" "")] + (match_operand:VI124_AVX2 1 "register_operand" "")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands, false, true); DONE;") (define_expand "vec_unpacku_lo_<mode>" [(match_operand:<sseunpackmode> 0 "register_operand" "") - (match_operand:VI124_128 1 "register_operand" "")] + (match_operand:VI124_AVX2 1 "register_operand" "")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands, true, false); DONE;") (define_expand "vec_unpacku_hi_<mode>" [(match_operand:<sseunpackmode> 0 "register_operand" "") - (match_operand:VI124_128 1 "register_operand" "")] + (match_operand:VI124_AVX2 1 "register_operand" "")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands, true, true); DONE;") --- gcc/config/i386/i386.c.jj 2011-10-12 14:19:26.000000000 +0200 +++ gcc/config/i386/i386.c 2011-10-12 16:15:50.000000000 +0200 @@ -19658,9 +19658,38 @@ ix86_expand_sse_unpack (rtx operands[2], if (TARGET_SSE4_1) { rtx (*unpack)(rtx, rtx); + rtx (*extract)(rtx, rtx) = NULL; + enum machine_mode halfmode = BLKmode; switch (imode) { + case V32QImode: + if (unsigned_p) + unpack = gen_avx2_zero_extendv16qiv16hi2; + else + unpack = gen_avx2_sign_extendv16qiv16hi2; + halfmode = V16QImode; + extract + = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi; + break; + case V16HImode: + if (unsigned_p) + unpack = gen_avx2_zero_extendv8hiv8si2; + else + unpack = gen_avx2_sign_extendv8hiv8si2; + halfmode = V8HImode; + extract + = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi; + break; + case V8SImode: + if (unsigned_p) + unpack = gen_avx2_zero_extendv4siv4di2; + else + unpack = gen_avx2_sign_extendv4siv4di2; + halfmode = V4SImode; + extract + = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si; + break; case V16QImode: if (unsigned_p) unpack = gen_sse4_1_zero_extendv8qiv8hi2; @@ -19683,7 +19712,12 @@ ix86_expand_sse_unpack (rtx operands[2], gcc_unreachable (); } - if (high_p) + if (GET_MODE_SIZE (imode) == 32) + { + tmp = gen_reg_rtx (halfmode); + emit_insn (extract (tmp, operands[1])); + } + else if (high_p) { /* Shift higher 8 bytes to lower 8 bytes. */ tmp = gen_reg_rtx (imode); Jakub