> From: Dipesh Sharma <[email protected]>
> Sent: Friday, June 26, 2026 8:16 PM
> 
> diff --git a/gcc/config/i386/avx10_v2_auxintrin.h
> b/gcc/config/i386/avx10_v2_auxintrin.h
> index c53ecf3f3d7..fdfc7f44ecb 100644
> --- a/gcc/config/i386/avx10_v2_auxintrin.h
> +++ b/gcc/config/i386/avx10_v2_auxintrin.h
> @@ -1467,6 +1467,134 @@ _mm512_maskz_cvthf6_hf8(__mmask64 __U,
> __m512i __A) {
>                                                (__mmask64) __U);
>  }
> 
> +// VPMOVSSDB - 128-bit
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_pmovssdb_epi8 (__m128i __A)

The naming is not as consistent as previous ones. Although it is a move,
it is an actual convert. You could refer to gcc/config/i386/avx512vlintrin.h.
For vpmovdb, the naming for the intrin is _mm_cvtepi32_epi8.
For vpmovsdb, the naming for the intrin is _mm_cvtsepi32_epi8.
I suppose we should use _mm_cvtss_epi32_epi8 here. Adding a "_"
between "ss" and "epi32" is to separate them clearly.

Also, you may also need store intrins like vpmovdb and vpmovsdb.

> +// VUNPACKB - 128-bit
> +#define _mm_unpackb_epi8(A, imm) \
> +     ((__m128i)
> __builtin_ia32_vunpackb128_mask((__v16qi)(__m128i)(A), \
> +     (int)(imm), (__v16qi)(__m128i)(_mm_undefined_si128 ()),
> (__mmask16)(-1)))

You will also need non-macro ones. macro ones are for const prop
under -O0. You could wrap non-macro ones under __OPTIMIZE__,
macros ones out of __OPTIMIZE__.

> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 3db68d0b117..66d9348ec32 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -34136,3 +34138,91 @@
>    "vcvt<convertfp62hf8>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
>    [(set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> +
> +;; VUNPACKB - Sub-byte element extraction
> +
> +(define_insn "vunpackb<mode>"
> +  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
> +     (unspec:VI1_AVX512VL
> +       [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
> +        (match_operand:SI 2 "const_0_to_255_operand" "n")]
> +       UNSPEC_VUNPACKB))]
> +  "TARGET_AVX10_V2_AUX"
> +  "vunpackb\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_expand "vunpackb<mode>_mask"
> +  [(set (match_operand:VI1_AVX512VL 0 "register_operand")
> +     (vec_merge:VI1_AVX512VL
> +       (unspec:VI1_AVX512VL
> +         [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand")
> +          (match_operand:SI 2 "const_0_to_255_operand")]
> +         UNSPEC_VUNPACKB)
> +       (match_operand:VI1_AVX512VL 3 "nonimm_or_0_operand")
> +       (match_operand:<avx512fmaskmode> 4
> "register_or_constm1_operand")))]
> +  "TARGET_AVX10_V2_AUX"
> +{
> +  if (CONST_INT_P (operands[4]))
> +    {
> +      emit_insn (gen_vunpackb<mode> (operands[0], operands[1],
> operands[2]));
> +      DONE;
> +    }
> +})
> +

One more thing for <mask_name>. If you use <mask_name>, you
could omit this define_expand since it will do the optimization
automatically for you.

> +(define_insn "*vunpackb<mode>_mask"
> +  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
> +     (vec_merge:VI1_AVX512VL
> +       (unspec:VI1_AVX512VL
> +         [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand"
> "vm")
> +          (match_operand:SI 2 "const_0_to_255_operand" "n")]
> +         UNSPEC_VUNPACKB)
> +       (match_operand:VI1_AVX512VL 3 "nonimm_or_0_operand" "0C")
> +       (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
> +  "TARGET_AVX10_V2_AUX"
> +  "vunpackb\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
> +  [(set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
> +;; VPMOVSSDB - Symmetric signed saturation narrow (32-bit to 8-bit)
> +
> +
> +(define_insn "vpmovssdb<mode>"
> +  [(set (match_operand:V16QI 0 "register_operand" "=v")
> +     (unspec:V16QI
> +       [(match_operand:VI4_AVX512VL 1 "register_operand" "v")]
> +       UNSPEC_VPMOVSSDB))]
> +  "TARGET_AVX10_V2_AUX"
> +  "vpmovssdb\t{%1, %0|%0, %1}"
> +  [(set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_expand "vpmovssdb<mode>_mask"
> +  [(set (match_operand:V16QI 0 "register_operand")
> +     (vec_merge:V16QI
> +       (unspec:V16QI
> +         [(match_operand:VI4_AVX512VL 1 "register_operand")]
> +         UNSPEC_VPMOVSSDB)
> +       (match_operand:V16QI 2 "nonimm_or_0_operand")
> +       (match_operand:<avx512fmaskmode> 3
> "register_or_constm1_operand")))]
> +  "TARGET_AVX10_V2_AUX"
> +{
> +  if (CONST_INT_P (operands[3]))
> +    {
> +      emit_insn (gen_vpmovssdb<mode> (operands[0], operands[1]));
> +      DONE;
> +    }
> +})
> +
> +(define_insn "*vpmovssdb<mode>_mask"
> +  [(set (match_operand:V16QI 0 "register_operand" "=v")
> +     (vec_merge:V16QI
> +       (unspec:V16QI
> +         [(match_operand:VI4_AVX512VL 1 "register_operand" "v")]
> +         UNSPEC_VPMOVSSDB)
> +       (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
> +       (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
> +  "TARGET_AVX10_V2_AUX"
> +  "vpmovssdb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
> +  [(set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])

vpmovssdb has reg to reg and reg to mem, you only had reg to reg
part here. You could also refer to previous similar ones just as those
AVX512VL ones.

> diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-v2-aux-convert-10.c
> b/gcc/testsuite/gcc.target/i386/avx10_2-v2-aux-convert-10.c

Typically, we put all compile test under -1.c, and leave runtime test in
-2.c. You could combine them all in one -1. Or you could put them in -1a,
-1b, -1c, etc. That is also clear (and maybe better than a single -1.c I think).

Thx,
Haochen

Reply via email to