Hi! The AVX2 docs say that the insns will #UD if any of the mask, src and index registers are the same, but e.g. on #include <x86intrin.h>
__m256 m; float f[1024]; __m256 foo (void) { __m256i mi = (__m256i) m; return _mm256_mask_i32gather_ps (m, f, mi, m, 4); } which is IMHO valid and should for m being zero vector just return a zero vector and clear mask (in this case it was already cleared) we compile it as vmovdqa m(%rip), %ymm1 vmovaps %ymm1, %ymm0 vgatherdps %ymm1, (%rax, %ymm1, 4), %ymm0 and thus IMHO it will #UD. Also, the insns should make it clear that the mask register is modified too (the patch clobbers it, perhaps we could instead say that it zeros the register (which is true if it doesn't segfault), but then what if a segfault handler chooses to continue with the next insn and doesn't clear the mask register?). Still, the insn description is imprecise, saying that it loads from mem at the address register is wrong and perhaps some DCE might delete what shouldn't be deleted. So, either it should (use (mem (scratch))) or something similar, or in the unspec list all the memory locations that are being read (mem:<scalarssemode> (plus:SI (reg:SI) (vec_select:SI (match_operand:V4SI) (parallel [(const_int N)])))) for N 0 through something (but it is complicated by Pmode size vs. the need to do nothing/truncate/sign_extend the vec_select to the right mode). What do you think? 2011-10-08 Jakub Jelinek <ja...@redhat.com> * config/i386/sse.md (avx2_gathersi<mode>, avx2_gatherdi<mode>, avx2_gatherdi<mode>256): Add clobber of operand 4. (*avx2_gathersi<mode>, *avx2_gatherdi<mode>, *avx2_gatherdi<mode>256): Add clobber of the mask register, add earlyclobber to both output operands. --- gcc/config/i386/sse.md.jj 2011-10-07 10:03:27.000000000 +0200 +++ gcc/config/i386/sse.md 2011-10-08 17:14:50.000000000 +0200 @@ -12521,55 +12521,59 @@ (define_mode_attr VEC_GATHER_MODE (V8SI "V8SI") (V8SF "V8SI")]) (define_expand "avx2_gathersi<mode>" - [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") - (unspec:VEC_GATHER_MODE - [(match_operand:VEC_GATHER_MODE 1 "register_operand" "") - (match_operand:<ssescalarmode> 2 "memory_operand" "") - (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "") - (match_operand:VEC_GATHER_MODE 4 "register_operand" "") - (match_operand:SI 5 "const1248_operand " "")] - UNSPEC_GATHER))] + [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") + (unspec:VEC_GATHER_MODE + [(match_operand:VEC_GATHER_MODE 1 "register_operand" "") + (match_operand:<ssescalarmode> 2 "memory_operand" "") + (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "") + (match_operand:VEC_GATHER_MODE 4 "register_operand" "") + (match_operand:SI 5 "const1248_operand " "")] + UNSPEC_GATHER)) + (clobber (match_dup 4))])] "TARGET_AVX2") (define_insn "*avx2_gathersi<mode>" - [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=x") + [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") (unspec:VEC_GATHER_MODE - [(match_operand:VEC_GATHER_MODE 1 "register_operand" "0") + [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0") (mem:<ssescalarmode> - (match_operand:P 2 "register_operand" "r")) - (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "x") - (match_operand:VEC_GATHER_MODE 4 "register_operand" "x") - (match_operand:SI 5 "const1248_operand" "n")] - UNSPEC_GATHER))] + (match_operand:P 3 "register_operand" "r")) + (match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x") + (match_operand:VEC_GATHER_MODE 5 "register_operand" "1") + (match_operand:SI 6 "const1248_operand" "n")] + UNSPEC_GATHER)) + (clobber (match_operand:VEC_GATHER_MODE 1 "register_operand" "=&x"))] "TARGET_AVX2" - "v<gthrfirstp>gatherd<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}" + "v<gthrfirstp>gatherd<gthrlastp>\t{%1, (%3, %4, %c6), %0|%0, (%3, %4, %c6), %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) (define_expand "avx2_gatherdi<mode>" - [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") - (unspec:VEC_GATHER_MODE - [(match_operand:VEC_GATHER_MODE 1 "register_operand" "") - (match_operand:<ssescalarmode> 2 "memory_operand" "") - (match_operand:<AVXMODE48P_DI> 3 "register_operand" "") - (match_operand:VEC_GATHER_MODE 4 "register_operand" "") - (match_operand:SI 5 "const1248_operand " "")] - UNSPEC_GATHER))] + [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") + (unspec:VEC_GATHER_MODE + [(match_operand:VEC_GATHER_MODE 1 "register_operand" "") + (match_operand:<ssescalarmode> 2 "memory_operand" "") + (match_operand:<AVXMODE48P_DI> 3 "register_operand" "") + (match_operand:VEC_GATHER_MODE 4 "register_operand" "") + (match_operand:SI 5 "const1248_operand " "")] + UNSPEC_GATHER)) + (clobber (match_dup 4))])] "TARGET_AVX2") (define_insn "*avx2_gatherdi<mode>" - [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=x") + [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x") (unspec:AVXMODE48P_DI - [(match_operand:AVXMODE48P_DI 1 "register_operand" "0") + [(match_operand:AVXMODE48P_DI 2 "register_operand" "0") (mem:<ssescalarmode> - (match_operand:P 2 "register_operand" "r")) - (match_operand:<AVXMODE48P_DI> 3 "register_operand" "x") - (match_operand:AVXMODE48P_DI 4 "register_operand" "x") - (match_operand:SI 5 "const1248_operand" "n")] - UNSPEC_GATHER))] + (match_operand:P 3 "register_operand" "r")) + (match_operand:<AVXMODE48P_DI> 4 "register_operand" "x") + (match_operand:AVXMODE48P_DI 5 "register_operand" "1") + (match_operand:SI 6 "const1248_operand" "n")] + UNSPEC_GATHER)) + (clobber (match_operand:AVXMODE48P_DI 1 "register_operand" "=&x"))] "TARGET_AVX2" - "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}" + "v<gthrfirstp>gatherq<gthrlastp>\t{%1, (%3, %4, %c6), %0|%0, (%3, %4, %c6), %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) @@ -12577,28 +12581,30 @@ (define_insn "*avx2_gatherdi<mode>" ;; Special handling for VEX.256 with float arguments ;; since there're still xmms as operands (define_expand "avx2_gatherdi<mode>256" - [(set (match_operand:VI4F_128 0 "register_operand" "") - (unspec:VI4F_128 - [(match_operand:VI4F_128 1 "register_operand" "") - (match_operand:<ssescalarmode> 2 "memory_operand" "") - (match_operand:V4DI 3 "register_operand" "") - (match_operand:VI4F_128 4 "register_operand" "") - (match_operand:SI 5 "const1248_operand " "")] - UNSPEC_GATHER))] + [(parallel [(set (match_operand:VI4F_128 0 "register_operand" "") + (unspec:VI4F_128 + [(match_operand:VI4F_128 1 "register_operand" "") + (match_operand:<ssescalarmode> 2 "memory_operand" "") + (match_operand:V4DI 3 "register_operand" "") + (match_operand:VI4F_128 4 "register_operand" "") + (match_operand:SI 5 "const1248_operand " "")] + UNSPEC_GATHER)) + (clobber (match_dup 4))])] "TARGET_AVX2") (define_insn "*avx2_gatherdi<mode>256" [(set (match_operand:VI4F_128 0 "register_operand" "=x") (unspec:VI4F_128 - [(match_operand:VI4F_128 1 "register_operand" "0") + [(match_operand:VI4F_128 2 "register_operand" "0") (mem:<ssescalarmode> - (match_operand:P 2 "register_operand" "r")) - (match_operand:V4DI 3 "register_operand" "x") - (match_operand:VI4F_128 4 "register_operand" "x") - (match_operand:SI 5 "const1248_operand" "n")] - UNSPEC_GATHER))] + (match_operand:P 3 "register_operand" "r")) + (match_operand:V4DI 4 "register_operand" "x") + (match_operand:VI4F_128 5 "register_operand" "1") + (match_operand:SI 6 "const1248_operand" "n")] + UNSPEC_GATHER)) + (clobber (match_operand:VI4F_128 1 "register_operand" "=&x"))] "TARGET_AVX2" - "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}" + "v<gthrfirstp>gatherq<gthrlastp>\t{%1, (%3, %4, %c6), %0|%0, (%3, %4, %c6), %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) Jakub