https://gcc.gnu.org/g:e5f8a39941f6f0f25dac88bd71fd368fb284a10f
commit r15-1678-ge5f8a39941f6f0f25dac88bd71fd368fb284a10f Author: Hu, Lin1 <lin1...@intel.com> Date: Wed Feb 28 18:11:55 2024 +0800 vect: Support v4hi -> v4qi. gcc/ChangeLog: PR target/107432 * config/i386/mmx.md (VI2_32_64): New mode iterator. (mmxhalfmode): New mode atter. (mmxhalfmodelower): Ditto. (truncv2hiv2qi2): Extend mode v4hi and change name from truncv2hiv2qi to trunc<mode><mmxhalfmodelower>2. gcc/testsuite/ChangeLog: PR target/107432 * gcc.target/i386/pr107432-1.c: Modify test. * gcc.target/i386/pr107432-6.c: Add test. * gcc.target/i386/pr108938-3.c: This patch supports truncv4hiv4qi affect bswap optimization, so I added the -mno-avx option for now, and open a bugzilla. Diff: --- gcc/config/i386/mmx.md | 17 +++++++++++++---- gcc/testsuite/gcc.target/i386/pr107432-1.c | 13 ++++++++++++- gcc/testsuite/gcc.target/i386/pr107432-6.c | 29 ++++++++++++++++++----------- gcc/testsuite/gcc.target/i386/pr108938-3.c | 2 +- 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index ea53f516cbb..24c0516726c 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -67,6 +67,9 @@ ;; 4-byte integer vector modes (define_mode_iterator VI_32 [V4QI V2HI]) +;; 8-byte and 4-byte HImode vector modes +(define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI]) + ;; 4-byte and 2-byte integer vector modes (define_mode_iterator VI_16_32 [V4QI V2QI V2HI]) @@ -106,6 +109,12 @@ (define_mode_attr mmxdoublemode [(V8QI "V8HI") (V4HI "V4SI")]) +(define_mode_attr mmxhalfmode + [(V4HI "V4QI") (V2HI "V2QI")]) + +(define_mode_attr mmxhalfmodelower + [(V4HI "v4qi") (V2HI "v2qi")]) + ;; Mapping of vector float modes to an integer mode of the same size (define_mode_attr mmxintvecmode [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI") @@ -4880,10 +4889,10 @@ DONE; }) -(define_insn "truncv2hiv2qi2" - [(set (match_operand:V2QI 0 "register_operand" "=v") - (truncate:V2QI - (match_operand:V2HI 1 "register_operand" "v")))] +(define_insn "trunc<mode><mmxhalfmodelower>2" + [(set (match_operand:<mmxhalfmode> 0 "register_operand" "=v") + (truncate:<mmxhalfmode> + (match_operand:VI2_32_64 1 "register_operand" "v")))] "TARGET_AVX512VL && TARGET_AVX512BW" "vpmovwb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c index a4f37447eb4..afdf367afe2 100644 --- a/gcc/testsuite/gcc.target/i386/pr107432-1.c +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c @@ -7,7 +7,8 @@ /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */ #include <x86intrin.h> @@ -113,6 +114,11 @@ __v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a) return __builtin_convertvector((__v2hi)a, __v2qi); } +__v4qi mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a) +{ + return __builtin_convertvector((__v4hi)a, __v4qi); +} + __v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a) { return __builtin_convertvector((__v8hi)a, __v8qi); @@ -218,6 +224,11 @@ __v2qu mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a) return __builtin_convertvector((__v2hu)a, __v2qu); } +__v4qu mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a) +{ + return __builtin_convertvector((__v4hu)a, __v4qu); +} + __v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a) { return __builtin_convertvector((__v8hu)a, __v8qu); diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c index 4a68a10b089..dd585b2a351 100644 --- a/gcc/testsuite/gcc.target/i386/pr107432-6.c +++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c @@ -1,18 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq -fno-trapping-math" } */ -/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */ -/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dq" 4 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dq" 6 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttps2dq" 6 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttps2dq" 8 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttph2w" 8 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttph2w" 10 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */ #include <x86intrin.h> @@ -103,6 +100,11 @@ __v2qi mm32_cvtph_epi8_builtin_convertvector(__v2hf a) return __builtin_convertvector((__v2hf)a, __v2qi); } +__v4qi mm64_cvtph_epi8_builtin_convertvector(__v4hf a) +{ + return __builtin_convertvector((__v4hf)a, __v4qi); +} + __v8qi mm128_cvtph_epi8_builtin_convertvector(__v8hf a) { return __builtin_convertvector((__v8hf)a, __v8qi); @@ -123,6 +125,11 @@ __v2qu mm32_cvtph_epu8_builtin_convertvector(__v2hf a) return __builtin_convertvector((__v2hf)a, __v2qu); } +__v4qu mm64_cvtph_epu8_builtin_convertvector(__v4hf a) +{ + return __builtin_convertvector((__v4hf)a, __v4qu); +} + __v8qu mm128_cvtph_epu8_builtin_convertvector(__v8hf a) { return __builtin_convertvector((__v8hf)a, __v8qu); diff --git a/gcc/testsuite/gcc.target/i386/pr108938-3.c b/gcc/testsuite/gcc.target/i386/pr108938-3.c index 32ac544c7ed..757a0c456bc 100644 --- a/gcc/testsuite/gcc.target/i386/pr108938-3.c +++ b/gcc/testsuite/gcc.target/i386/pr108938-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -mno-movbe" } */ +/* { dg-options "-O2 -ftree-vectorize -mno-movbe -mno-avx" } */ /* { dg-final { scan-assembler-times "bswap\[\t ]+" 2 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "bswap\[\t ]+" 3 { target ia32 } } } */