Hi!

The AVX2 docs say that the insns will #UD if any of the mask, src and index
registers are the same, but e.g. on
#include <x86intrin.h>

__m256 m;
float f[1024];

__m256
foo (void)
{
  __m256i mi = (__m256i) m;
  return _mm256_mask_i32gather_ps (m, f, mi, m, 4);
}

which is IMHO valid and should for m being zero vector just return a
zero vector and clear mask (in this case it was already cleared) we compile
it as
        vmovdqa m(%rip), %ymm1
        vmovaps %ymm1, %ymm0
        vgatherdps      %ymm1, (%rax, %ymm1, 4), %ymm0
and thus IMHO it will #UD.  Also, the insns should make it clear that
the mask register is modified too (the patch clobbers it, perhaps
we could instead say that it zeros the register (which is true if
it doesn't segfault), but then what if a segfault handler chooses to
continue with the next insn and doesn't clear the mask register?).
Still, the insn description is imprecise, saying that it loads from mem
at the address register is wrong and perhaps some DCE might delete
what shouldn't be deleted.  So, either it should (use (mem (scratch)))
or something similar, or in the unspec list all the memory locations
that are being read
(mem:<scalarssemode> (plus:SI (reg:SI) (vec_select:SI (match_operand:V4SI)
(parallel [(const_int N)]))))
for N 0 through something (but it is complicated by Pmode size vs.
the need to do nothing/truncate/sign_extend the vec_select to the right
mode).

What do you think?

2011-10-08  Jakub Jelinek  <ja...@redhat.com>

        * config/i386/sse.md (avx2_gathersi<mode>, avx2_gatherdi<mode>,
        avx2_gatherdi<mode>256): Add clobber of operand 4.
        (*avx2_gathersi<mode>, *avx2_gatherdi<mode>,
        *avx2_gatherdi<mode>256): Add clobber of the mask register,
        add earlyclobber to both output operands.

--- gcc/config/i386/sse.md.jj   2011-10-07 10:03:27.000000000 +0200
+++ gcc/config/i386/sse.md      2011-10-08 17:14:50.000000000 +0200
@@ -12521,55 +12521,59 @@ (define_mode_attr VEC_GATHER_MODE
                       (V8SI "V8SI") (V8SF "V8SI")])
 
 (define_expand "avx2_gathersi<mode>"
-  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
-       (unspec:VEC_GATHER_MODE
-         [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
-          (match_operand:<ssescalarmode> 2 "memory_operand" "")
-          (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
-          (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
-          (match_operand:SI 5 "const1248_operand " "")]
-         UNSPEC_GATHER))]
+  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
+                  (unspec:VEC_GATHER_MODE
+                    [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
+                     (match_operand:<ssescalarmode> 2 "memory_operand" "")
+                     (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
+                     (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
+                     (match_operand:SI 5 "const1248_operand " "")]
+                    UNSPEC_GATHER))
+             (clobber (match_dup 4))])]
   "TARGET_AVX2")
 
 (define_insn "*avx2_gathersi<mode>"
-  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=x")
+  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
        (unspec:VEC_GATHER_MODE
-         [(match_operand:VEC_GATHER_MODE 1 "register_operand" "0")
+         [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
           (mem:<ssescalarmode>
-            (match_operand:P 2 "register_operand" "r"))
-          (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "x")
-          (match_operand:VEC_GATHER_MODE 4 "register_operand" "x")
-          (match_operand:SI 5 "const1248_operand" "n")]
-         UNSPEC_GATHER))]
+            (match_operand:P 3 "register_operand" "r"))
+          (match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x")
+          (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")
+          (match_operand:SI 6 "const1248_operand" "n")]
+         UNSPEC_GATHER))
+   (clobber (match_operand:VEC_GATHER_MODE 1 "register_operand" "=&x"))]
   "TARGET_AVX2"
-  "v<gthrfirstp>gatherd<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), 
%4}"
+  "v<gthrfirstp>gatherd<gthrlastp>\t{%1, (%3, %4, %c6), %0|%0, (%3, %4, %c6), 
%1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "avx2_gatherdi<mode>"
-  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
-       (unspec:VEC_GATHER_MODE
-         [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
-          (match_operand:<ssescalarmode> 2 "memory_operand" "")
-          (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
-          (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
-          (match_operand:SI 5 "const1248_operand " "")]
-         UNSPEC_GATHER))]
+  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
+                  (unspec:VEC_GATHER_MODE
+                    [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
+                     (match_operand:<ssescalarmode> 2 "memory_operand" "")
+                     (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
+                     (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
+                     (match_operand:SI 5 "const1248_operand " "")]
+                    UNSPEC_GATHER))
+             (clobber (match_dup 4))])]
   "TARGET_AVX2")
 
 (define_insn "*avx2_gatherdi<mode>"
-  [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=x")
+  [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x")
        (unspec:AVXMODE48P_DI
-         [(match_operand:AVXMODE48P_DI 1 "register_operand" "0")
+         [(match_operand:AVXMODE48P_DI 2 "register_operand" "0")
           (mem:<ssescalarmode>
-            (match_operand:P 2 "register_operand" "r"))
-          (match_operand:<AVXMODE48P_DI> 3 "register_operand" "x")
-          (match_operand:AVXMODE48P_DI 4 "register_operand" "x")
-          (match_operand:SI 5 "const1248_operand" "n")]
-         UNSPEC_GATHER))]
+            (match_operand:P 3 "register_operand" "r"))
+          (match_operand:<AVXMODE48P_DI> 4 "register_operand" "x")
+          (match_operand:AVXMODE48P_DI 5 "register_operand" "1")
+          (match_operand:SI 6 "const1248_operand" "n")]
+         UNSPEC_GATHER))
+   (clobber (match_operand:AVXMODE48P_DI 1 "register_operand" "=&x"))]
   "TARGET_AVX2"
-  "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), 
%4}"
+  "v<gthrfirstp>gatherq<gthrlastp>\t{%1, (%3, %4, %c6), %0|%0, (%3, %4, %c6), 
%1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -12577,28 +12581,30 @@ (define_insn "*avx2_gatherdi<mode>"
 ;; Special handling for VEX.256 with float arguments
 ;; since there're still xmms as operands
 (define_expand "avx2_gatherdi<mode>256"
-  [(set (match_operand:VI4F_128 0 "register_operand" "")
-       (unspec:VI4F_128
-         [(match_operand:VI4F_128 1 "register_operand" "")
-          (match_operand:<ssescalarmode> 2 "memory_operand" "")
-          (match_operand:V4DI 3 "register_operand" "")
-          (match_operand:VI4F_128 4 "register_operand" "")
-          (match_operand:SI 5 "const1248_operand " "")]
-         UNSPEC_GATHER))]
+  [(parallel [(set (match_operand:VI4F_128 0 "register_operand" "")
+                  (unspec:VI4F_128
+                    [(match_operand:VI4F_128 1 "register_operand" "")
+                     (match_operand:<ssescalarmode> 2 "memory_operand" "")
+                     (match_operand:V4DI 3 "register_operand" "")
+                     (match_operand:VI4F_128 4 "register_operand" "")
+                     (match_operand:SI 5 "const1248_operand " "")]
+                    UNSPEC_GATHER))
+             (clobber (match_dup 4))])]
   "TARGET_AVX2")
 
 (define_insn "*avx2_gatherdi<mode>256"
   [(set (match_operand:VI4F_128 0 "register_operand" "=x")
        (unspec:VI4F_128
-         [(match_operand:VI4F_128 1 "register_operand" "0")
+         [(match_operand:VI4F_128 2 "register_operand" "0")
           (mem:<ssescalarmode>
-            (match_operand:P 2 "register_operand" "r"))
-          (match_operand:V4DI 3 "register_operand" "x")
-          (match_operand:VI4F_128 4 "register_operand" "x")
-          (match_operand:SI 5 "const1248_operand" "n")]
-         UNSPEC_GATHER))]
+            (match_operand:P 3 "register_operand" "r"))
+          (match_operand:V4DI 4 "register_operand" "x")
+          (match_operand:VI4F_128 5 "register_operand" "1")
+          (match_operand:SI 6 "const1248_operand" "n")]
+         UNSPEC_GATHER)) 
+   (clobber (match_operand:VI4F_128 1 "register_operand" "=&x"))]
   "TARGET_AVX2"
-  "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), 
%4}"
+  "v<gthrfirstp>gatherq<gthrlastp>\t{%1, (%3, %4, %c6), %0|%0, (%3, %4, %c6), 
%1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])

        Jakub

Reply via email to