On Thu, Jul 05, 2018 at 11:57:26PM +0300, Grazvydas Ignotas wrote:
> I think it would be more efficient if you took care of it. I won't
> have time for at least a few days anyway.

Here is what I have ATM, but will still need to work on the testsuite side
for it and bootstrap/regtest it.
In addition to the checks I've posted I've also done:
echo `sed -n '/^_mm.*__mmask/,/^}/p' config/i386/*.h | sed 's/^}/@@@/'` | sed 
's/@@@/}\n/g' > /tmp/11
echo `sed -n '/^#define[ \t]_mm/,/)$/p' config/i386/*.h | sed 's/)$/@@@/' | sed 
's/\\$//'` | sed 's/@@@/)\n/g' | grep __mmask >> /tmp/11
for i in `grep '__builtin.*_UQI)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask8; done
for i in `grep '__builtin.*_UHI)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask16; done
for i in `grep '__builtin.*_USI)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask32; done
for i in `grep '__builtin.*_UDI)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask64; done
for i in `grep '__builtin.*_QI)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask8; done
for i in `grep '__builtin.*_HI)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask16; done
for i in `grep '__builtin.*_SI)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask32; done
for i in `grep '__builtin.*_DI)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask64; done
for i in `grep '__builtin.*_UQI_INT)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask8; done
for i in `grep '__builtin.*_UHI_INT)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask16; done
for i in `grep '__builtin.*_USI_INT)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask32; done
for i in `grep '__builtin.*_UDI_INT)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask64; done
for i in `grep '__builtin.*_QI_INT)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask8; done
for i in `grep '__builtin.*_HI_INT)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask16; done
for i in `grep '__builtin.*_SI_INT)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask32; done
for i in `grep '__builtin.*_DI_INT)' config/i386/i386-builtin.def | sed 
's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ \t(].*__mmask' /tmp/11 | grep 
-v __mmask64; done
and finally manual checking (could be automated too) of:
for i in `grep '__builtin.*_INT)' config/i386/i386-builtin.def | grep -v 
'_U\?[QHSD]I_INT)' | sed 's/^[^"]*"//;s/".*$//' | sort -u`; do grep $i'[ 
\t(].*__mmask' /tmp/11 | grep -v __mmask32; done
For this last one, it is about trying to verify what kind of 
'__v\(2\|4\|8\|16\|32\|64\)[qhsd][if]'
is used with the different __mmask, 2/4/8 should be used with __mmask8,
16 with __mmask16, 32 with __mmask32 and 64 with __mmask64.

Some fixes in the patch are mostly for consistency and harmless for code
generation (e.g. when argument should have been __mmask8 but was __mmask16),
but several changes should fix wrong-code bugs.

--- gcc/config/i386/avx512bwintrin.h.jj 2018-01-03 10:20:06.699535804 +0100
+++ gcc/config/i386/avx512bwintrin.h    2018-07-06 09:53:44.657235040 +0200
@@ -3043,7 +3043,7 @@ _mm512_cmp_epi16_mask (__m512i __X, __m5
 
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epi8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+_mm512_mask_cmp_epi8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
                           const int __P)
 {
   return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
@@ -3081,7 +3081,7 @@ _mm512_cmp_epu16_mask (__m512i __X, __m5
 
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epu8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+_mm512_mask_cmp_epu8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
                           const int __P)
 {
   return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
--- gcc/config/i386/avx512bitalgintrin.h.jj     2018-01-26 12:43:26.374922539 
+0100
+++ gcc/config/i386/avx512bitalgintrin.h        2018-07-06 09:51:19.536082408 
+0200
@@ -107,7 +107,7 @@ _mm512_bitshuffle_epi64_mask (__m512i __
 
 extern __inline __mmask64
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B)
+_mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
 {
   return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
                                                 (__v64qi) __B,
--- gcc/config/i386/avx512fintrin.h.jj  2018-05-21 13:15:43.494581775 +0200
+++ gcc/config/i386/avx512fintrin.h     2018-07-06 11:41:19.879801396 +0200
@@ -7377,7 +7377,7 @@ _mm512_xor_epi64 (__m512i __A, __m512i _
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+_mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
                                                 (__v8di) __B,
@@ -7387,7 +7387,7 @@ _mm512_mask_xor_epi64 (__m512i __W, __mm
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
+_mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
                                                 (__v8di) __B,
@@ -9615,7 +9615,7 @@ _mm512_cmpneq_epu32_mask (__m512i __X, _
 
 extern __inline __mmask8
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
+_mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
                                                    (__v8di) __Y, 4,
@@ -10877,22 +10877,22 @@ _mm512_mask_insertf32x4 (__m512 __A, __m
 #define _mm512_maskz_insertf32x4(A, X, Y, C)                            \
   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(),      \
-    (__mmask8)(A)))
+    (__mmask16)(A)))
 
 #define _mm512_maskz_inserti32x4(A, X, Y, C)                            \
   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
     (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (),     \
-    (__mmask8)(A)))
+    (__mmask16)(A)))
 
 #define _mm512_mask_insertf32x4(A, B, X, Y, C)                          \
   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A),             \
-                                            (__mmask8)(B)))
+                                            (__mmask16)(B)))
 
 #define _mm512_mask_inserti32x4(A, B, X, Y, C)                          \
   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A),           \
-                                             (__mmask8)(B)))
+                                             (__mmask16)(B)))
 #endif
 
 extern __inline __m512i
--- gcc/config/i386/avx512vlintrin.h.jj 2018-01-03 10:20:06.152535716 +0100
+++ gcc/config/i386/avx512vlintrin.h    2018-07-06 10:13:02.209506028 +0200
@@ -466,7 +466,7 @@ _mm256_maskz_add_pd (__mmask8 __U, __m25
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
                                                (__v4sf) __B,
@@ -476,7 +476,7 @@ _mm_mask_add_ps (__m128 __W, __mmask16 _
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
+_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
                                                (__v4sf) __B,
@@ -487,7 +487,7 @@ _mm_maskz_add_ps (__mmask16 __U, __m128
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
                                                (__v8sf) __B,
@@ -497,7 +497,7 @@ _mm256_mask_add_ps (__m256 __W, __mmask1
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
+_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
                                                (__v8sf) __B,
@@ -551,7 +551,7 @@ _mm256_maskz_sub_pd (__mmask8 __U, __m25
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
                                                (__v4sf) __B,
@@ -561,7 +561,7 @@ _mm_mask_sub_ps (__m128 __W, __mmask16 _
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
+_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
                                                (__v4sf) __B,
@@ -572,7 +572,7 @@ _mm_maskz_sub_ps (__mmask16 __U, __m128
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
                                                (__v8sf) __B,
@@ -582,7 +582,7 @@ _mm256_mask_sub_ps (__m256 __W, __mmask1
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
+_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
                                                (__v8sf) __B,
@@ -1320,7 +1320,7 @@ _mm256_mask_cvtepi32_ps (__m256 __W, __m
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
+_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
 {
   return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
                                                   (__v8sf)
@@ -1339,7 +1339,7 @@ _mm_mask_cvtepi32_ps (__m128 __W, __mmas
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
+_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
 {
   return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
                                                   (__v4sf)
--- gcc/config/i386/avx512vlbwintrin.h.jj       2018-01-03 10:20:06.598535787 
+0100
+++ gcc/config/i386/avx512vlbwintrin.h  2018-07-06 12:16:56.109933815 +0200
@@ -1467,7 +1467,7 @@ _mm256_cmp_epi16_mask (__m256i __X, __m2
 
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epi8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+_mm_mask_cmp_epi8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
                        const int __P)
 {
   return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
@@ -1486,7 +1486,7 @@ _mm_cmp_epi8_mask (__m128i __X, __m128i
 
 extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+_mm256_mask_cmp_epi8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
                           const int __P)
 {
   return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
@@ -1543,7 +1543,7 @@ _mm256_cmp_epu16_mask (__m256i __X, __m2
 
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epu8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+_mm_mask_cmp_epu8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
                        const int __P)
 {
   return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
@@ -1562,7 +1562,7 @@ _mm_cmp_epu8_mask (__m128i __X, __m128i
 
 extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+_mm256_mask_cmp_epu8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
                           const int __P)
 {
   return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
@@ -1998,7 +1998,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m1
 #define _mm_mask_cmp_epi16_mask(M, X, Y, P)                            \
   ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X),       \
                                            (__v8hi)(__m128i)(Y), (int)(P),\
-                                           (__mmask16)(M)))
+                                           (__mmask8)(M)))
 
 #define _mm_mask_cmp_epi8_mask(M, X, Y, P)                             \
   ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X),     \
@@ -2430,7 +2430,7 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
                                                    (__v16hi) __W,
@@ -2449,7 +2449,7 @@ _mm256_maskz_cvtepi8_epi16 (__mmask16 __
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
                                                    (__v8hi) __W,
@@ -2468,7 +2468,7 @@ _mm_maskz_cvtepi8_epi16 (__mmask8 __U, _
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
                                                    (__v16hi) __W,
@@ -2487,7 +2487,7 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
                                                    (__v8hi) __W,
@@ -4541,148 +4541,148 @@ _mm_mask_cmple_epi16_mask (__mmask8 __M,
                                                 (__mmask8) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-                                                 (__v32qi) __Y, 4,
-                                                 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+                                                  (__v32qi) __Y, 4,
+                                                  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-                                                 (__v32qi) __Y, 1,
-                                                 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+                                                  (__v32qi) __Y, 1,
+                                                  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-                                                 (__v32qi) __Y, 5,
-                                                 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+                                                  (__v32qi) __Y, 5,
+                                                  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-                                                 (__v32qi) __Y, 2,
-                                                 (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+                                                  (__v32qi) __Y, 2,
+                                                  (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-                                                 (__v16hi) __Y, 4,
-                                                 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+                                                  (__v16hi) __Y, 4,
+                                                  (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-                                                 (__v16hi) __Y, 1,
-                                                 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+                                                  (__v16hi) __Y, 1,
+                                                  (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-                                                 (__v16hi) __Y, 5,
-                                                 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+                                                  (__v16hi) __Y, 5,
+                                                  (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-                                                 (__v16hi) __Y, 2,
-                                                 (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+                                                  (__v16hi) __Y, 2,
+                                                  (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-                                                (__v32qi) __Y, 4,
-                                                (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+                                                 (__v32qi) __Y, 4,
+                                                 (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-                                                (__v32qi) __Y, 1,
-                                                (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+                                                 (__v32qi) __Y, 1,
+                                                 (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-                                                (__v32qi) __Y, 5,
-                                                (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+                                                 (__v32qi) __Y, 5,
+                                                 (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-                                                (__v32qi) __Y, 2,
-                                                (__mmask8) __M);
+  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+                                                 (__v32qi) __Y, 2,
+                                                 (__mmask32) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpneq_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-                                                (__v16hi) __Y, 4,
-                                                (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+                                                 (__v16hi) __Y, 4,
+                                                 (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmplt_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-                                                (__v16hi) __Y, 1,
-                                                (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+                                                 (__v16hi) __Y, 1,
+                                                 (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmpge_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-                                                (__v16hi) __Y, 5,
-                                                (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+                                                 (__v16hi) __Y, 5,
+                                                 (__mmask16) __M);
 }
 
-extern __inline __mmask8
+extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
+_mm256_mask_cmple_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
 {
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-                                                (__v16hi) __Y, 2,
-                                                (__mmask8) __M);
+  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+                                                 (__v16hi) __Y, 2,
+                                                 (__mmask16) __M);
 }
 
 #ifdef __DISABLE_AVX512VLBW__
--- gcc/config/i386/avx512vbmi2vlintrin.h.jj    2018-01-03 10:20:06.085535705 
+0100
+++ gcc/config/i386/avx512vbmi2vlintrin.h       2018-07-06 12:09:13.797471324 
+0200
@@ -541,7 +541,7 @@ _mm_shldi_epi64 (__m128i __A, __m128i __
        (__v4si)(__m128i)(B),(int)(C))
 #define _mm_mask_shrdi_epi32(A, B, C, D, E) \
   ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
-       (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
+       (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
 #define _mm_maskz_shrdi_epi32(A, B, C, D) \
   ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
        (__v4si)(__m128i)(C),(int)(D), \
@@ -601,7 +601,7 @@ _mm_shldi_epi64 (__m128i __A, __m128i __
        (__v4si)(__m128i)(B),(int)(C))
 #define _mm_mask_shldi_epi32(A, B, C, D, E) \
   ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
-       (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
+       (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
 #define _mm_maskz_shldi_epi32(A, B, C, D) \
   ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \
        (__v4si)(__m128i)(C),(int)(D), \


        Jakub

Reply via email to