On Tue, 25 Jun 2024, Hu, Lin1 wrote:
> Hi,
>
> This is the current version.
>
> I haven't made any major changes to the original code, I think it will have
> less impact on your code. And I think the current API is sufficient to
> support the mode selection you mentioned, if you have any concerns you can
> mention them. I can tweak it further.
>
> BRs,
> Lin
>
> gcc/ChangeLog:
>
> PR target/107432
> * tree-vect-generic.cc
> (expand_vector_conversion): Support convert for int -> int,
> float -> float and int <-> float.
> * tree-vect-stmts.cc (vectorizable_conversion): Wrap the
> indirect convert part.
> (supportable_indirect_convert_operation): New function.
> * tree-vectorizer.h (supportable_indirect_convert_operation):
> Define the new function.
>
> gcc/testsuite/ChangeLog:
>
> PR target/107432
> * gcc.target/i386/pr107432-1.c: New test.
> * gcc.target/i386/pr107432-2.c: Ditto.
> * gcc.target/i386/pr107432-3.c: Ditto.
> * gcc.target/i386/pr107432-4.c: Ditto.
> * gcc.target/i386/pr107432-5.c: Ditto.
> * gcc.target/i386/pr107432-6.c: Ditto.
> * gcc.target/i386/pr107432-7.c: Ditto.
> ---
> gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 +++++++++++++++++++
> gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +++++++++
> gcc/testsuite/gcc.target/i386/pr107432-3.c | 55 +++++
> gcc/testsuite/gcc.target/i386/pr107432-4.c | 56 +++++
> gcc/testsuite/gcc.target/i386/pr107432-5.c | 72 ++++++
> gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 +++++++++++
> gcc/testsuite/gcc.target/i386/pr107432-7.c | 150 ++++++++++++
> gcc/tree-vect-generic.cc | 34 ++-
> gcc/tree-vect-stmts.cc | 259 ++++++++++++++-------
> gcc/tree-vectorizer.h | 4 +
> 10 files changed, 1013 insertions(+), 95 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-3.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-4.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-5.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-6.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-7.c
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c
> b/gcc/testsuite/gcc.target/i386/pr107432-1.c
> new file mode 100644
> index 00000000000..a4f37447eb4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
> @@ -0,0 +1,234 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef short __v2hi __attribute__ ((__vector_size__ (4)));
> +typedef char __v2qi __attribute__ ((__vector_size__ (2)));
> +typedef char __v4qi __attribute__ ((__vector_size__ (4)));
> +typedef char __v8qi __attribute__ ((__vector_size__ (8)));
> +
> +typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4)));
> +typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8)));
> +typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
> +typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4)));
> +typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8)));
> +typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
> +
> +__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v2di)a, __v2si);
> +}
> +
> +__m128i mm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
> +{
> + return (__m128i)__builtin_convertvector((__v4di)a, __v4si);
> +}
> +
> +__m256i mm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
> +{
> + return (__m256i)__builtin_convertvector((__v8di)a, __v8si);
> +}
> +
> +__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v2di)a, __v2hi);
> +}
> +
> +__v4hi mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
> +{
> + return __builtin_convertvector((__v4di)a, __v4hi);
> +}
> +
> +__m128i mm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
> +{
> + return (__m128i)__builtin_convertvector((__v8di)a, __v8hi);
> +}
> +
> +__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v2di)a, __v2qi);
> +}
> +
> +__v4qi mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
> +{
> + return __builtin_convertvector((__v4di)a, __v4qi);
> +}
> +
> +__v8qi mm512_cvtepi64_epi8_builtin_convertvector(__m512i a)
> +{
> + return __builtin_convertvector((__v8di)a, __v8qi);
> +}
> +
> +__v2hi mm64_cvtepi32_epi16_builtin_convertvector(__v2si a)
> +{
> + return __builtin_convertvector((__v2si)a, __v2hi);
> +}
> +
> +__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v4si)a, __v4hi);
> +}
> +
> +__m128i mm256_cvtepi32_epi16_builtin_convertvector(__m256i a)
> +{
> + return (__m128i)__builtin_convertvector((__v8si)a, __v8hi);
> +}
> +
> +__m256i mm512_cvtepi32_epi16_builtin_convertvector(__m512i a)
> +{
> + return (__m256i)__builtin_convertvector((__v16si)a, __v16hi);
> +}
> +
> +__v2qi mm64_cvtepi32_epi8_builtin_convertvector(__v2si a)
> +{
> + return __builtin_convertvector((__v2si)a, __v2qi);
> +}
> +
> +__v4qi mm_cvtepi32_epi8_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v4si)a, __v4qi);
> +}
> +
> +__v8qi mm256_cvtepi32_epi8_builtin_convertvector(__m256i a)
> +{
> + return __builtin_convertvector((__v8si)a, __v8qi);
> +}
> +
> +__m128i mm512_cvtepi32_epi8_builtin_convertvector(__m512i a)
> +{
> + return (__m128i)__builtin_convertvector((__v16si)a, __v16qi);
> +}
> +
> +__v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a)
> +{
> + return __builtin_convertvector((__v2hi)a, __v2qi);
> +}
> +
> +__v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v8hi)a, __v8qi);
> +}
> +
> +__m128i mm256_cvtepi16_epi8_builtin_convertvector(__m256i a)
> +{
> + return (__m128i)__builtin_convertvector((__v16hi)a, __v16qi);
> +}
> +
> +__m256i mm512_cvtepi16_epi8_builtin_convertvector(__m512i a)
> +{
> + return (__m256i)__builtin_convertvector((__v32hi)a, __v32qi);
> +}
> +
> +__v2su mm_cvtepu64_epu32_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v2du)a, __v2su);
> +}
> +
> +__m128i mm256_cvtepu64_epu32_builtin_convertvector(__m256i a)
> +{
> + return (__m128i)__builtin_convertvector((__v4du)a, __v4su);
> +}
> +
> +__m256i mm512_cvtepu64_epu32_builtin_convertvector(__m512i a)
> +{
> + return (__m256i)__builtin_convertvector((__v8du)a, __v8su);
> +}
> +
> +__v2hu mm_cvtepu64_epu16_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v2du)a, __v2hu);
> +}
> +
> +__v4hu mm256_cvtepu64_epu16_builtin_convertvector(__m256i a)
> +{
> + return __builtin_convertvector((__v4du)a, __v4hu);
> +}
> +
> +__m128i mm512_cvtepu64_epu16_builtin_convertvector(__m512i a)
> +{
> + return (__m128i)__builtin_convertvector((__v8du)a, __v8hu);
> +}
> +
> +__v2qu mm_cvtepu64_epu8_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v2du)a, __v2qu);
> +}
> +
> +__v4qu mm256_cvtepu64_epu8_builtin_convertvector(__m256i a)
> +{
> + return __builtin_convertvector((__v4du)a, __v4qu);
> +}
> +
> +__v8qu mm512_cvtepu64_epu8_builtin_convertvector(__m512i a)
> +{
> + return __builtin_convertvector((__v8du)a, __v8qu);
> +}
> +
> +__v2hu mm32_cvtepu32_epu16_builtin_convertvector(__v2su a)
> +{
> + return __builtin_convertvector((__v2su)a, __v2hu);
> +}
> +
> +__v4hu mm_cvtepu32_epu16_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v4su)a, __v4hu);
> +}
> +
> +__m128i mm256_cvtepu32_epu16_builtin_convertvector(__m256i a)
> +{
> + return (__m128i)__builtin_convertvector((__v8su)a, __v8hu);
> +}
> +
> +__m256i mm512_cvtepu32_epu16_builtin_convertvector(__m512i a)
> +{
> + return (__m256i)__builtin_convertvector((__v16su)a, __v16hu);
> +}
> +
> +__v2qu mm32_cvtepu32_epu8_builtin_convertvector(__v2su a)
> +{
> + return __builtin_convertvector((__v2su)a, __v2qu);
> +}
> +
> +__v4qu mm_cvtepu2_epu8_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v4su)a, __v4qu);
> +}
> +
> +__v8qu mm256_cvtepu32_epu8_builtin_convertvector(__m256i a)
> +{
> + return __builtin_convertvector((__v8su)a, __v8qu);
> +}
> +
> +__m128i mm512_cvtepu32_epu8_builtin_convertvector(__m512i a)
> +{
> + return (__m128i)__builtin_convertvector((__v16su)a, __v16qu);
> +}
> +
> +__v2qu mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a)
> +{
> + return __builtin_convertvector((__v2hu)a, __v2qu);
> +}
> +
> +__v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
> +{
> + return __builtin_convertvector((__v8hu)a, __v8qu);
> +}
> +
> +__m128i mm256_cvtepu16_epu8_builtin_convertvector(__m256i a)
> +{
> + return (__m128i)__builtin_convertvector((__v16hu)a, __v16qu);
> +}
> +
> +__m256i mm512_cvtepu16_epu8_builtin_convertvector(__m512i a)
> +{
> + return (__m256i)__builtin_convertvector((__v32hu)a, __v32qu);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-2.c
> b/gcc/testsuite/gcc.target/i386/pr107432-2.c
> new file mode 100644
> index 00000000000..02ffd811cb4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-2.c
> @@ -0,0 +1,105 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vpmovsxdq" 3 } } */
> +/* { dg-final { scan-assembler-times "vpmovsxwq" 3 } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbq" 3 } } */
> +/* { dg-final { scan-assembler-times "vpmovsxwd" 3 } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbd" 3 } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbw" 3 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef short __v2hi __attribute__ ((__vector_size__ (4)));
> +typedef char __v2qi __attribute__ ((__vector_size__ (2)));
> +typedef char __v4qi __attribute__ ((__vector_size__ (4)));
> +typedef char __v8qi __attribute__ ((__vector_size__ (8)));
> +
> +__m128i mm_cvtepi32_epi64_builtin_convertvector(__v2si a)
> +{
> + return __builtin_convertvector(a, __v2di);
> +}
> +
> +__m256i mm256_cvtepi32_epi64_builtin_convertvector(__v4si a)
> +{
> + return (__m256i)__builtin_convertvector(a, __v4di);
> +}
> +
> +__m512i mm512_cvtepi32_epi64_builtin_convertvector(__v8si a)
> +{
> + return (__m512i)__builtin_convertvector(a, __v8di);
> +}
> +
> +__m128i mm_cvtepi16_epi64_builtin_convertvector(__v2hi a)
> +{
> + return __builtin_convertvector(a, __v2di);
> +}
> +
> +__m256i mm256_cvtepi16_epi64_builtin_convertvector(__v4hi a)
> +{
> + return (__m256i)__builtin_convertvector(a, __v4di);
> +}
> +
> +__m512i mm512_cvtepi16_epi64_builtin_convertvector(__v8hi a)
> +{
> + return (__m512i)__builtin_convertvector(a, __v8di);
> +}
> +
> +__m128i mm_cvtepi8_epi64_builtin_convertvector(__v2qi a)
> +{
> + return __builtin_convertvector(a, __v2di);
> +}
> +
> +__m256i mm256_cvtepi8_epi64_builtin_convertvector(__v4qi a)
> +{
> + return (__m256i)__builtin_convertvector(a, __v4di);
> +}
> +
> +__m512i mm512_cvtepi8_epi64_builtin_convertvector(__v8qi a)
> +{
> + return (__m512i)__builtin_convertvector(a, __v8di);
> +}
> +
> +__m128i mm_cvtepi16_epi32_builtin_convertvector(__v4hi a)
> +{
> + return (__m128i)__builtin_convertvector(a, __v4si);
> +}
> +
> +__m256i mm256_cvtepi16_epi32_builtin_convertvector(__v8hi a)
> +{
> + return (__m256i)__builtin_convertvector(a, __v8si);
> +}
> +
> +__m512i mm512_cvtepi16_epi32_builtin_convertvector(__v16hi a)
> +{
> + return (__m512i)__builtin_convertvector(a, __v16si);
> +}
> +
> +__m128i mm_cvtepi8_epi32_builtin_convertvector(__v4qi a)
> +{
> + return (__m128i)__builtin_convertvector(a, __v4si);
> +}
> +
> +__m256i mm256_cvtepi8_epi32_builtin_convertvector(__v8qi a)
> +{
> + return (__m256i)__builtin_convertvector(a, __v8si);
> +}
> +
> +__m512i mm512_cvtepi8_epi32_builtin_convertvector(__v16qi a)
> +{
> + return (__m512i)__builtin_convertvector(a, __v16si);
> +}
> +
> +__m128i mm_cvtepi8_epi16_builtin_convertvector(__v8qi a)
> +{
> + return (__m128i)__builtin_convertvector(a, __v8hi);
> +}
> +
> +__m256i mm256_cvtepi8_epi16_builtin_convertvector(__v16qi a)
> +{
> + return (__m256i)__builtin_convertvector(a, __v16hi);
> +}
> +
> +__v32hi mm512_cvtepi8_epi16_builtin_convertvector(__v32qi a)
> +{
> + return __builtin_convertvector(a, __v32hi);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-3.c
> b/gcc/testsuite/gcc.target/i386/pr107432-3.c
> new file mode 100644
> index 00000000000..30dc947b6dd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-3.c
> @@ -0,0 +1,55 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vcvtpd2ps" 3 } } */
> +/* { dg-final { scan-assembler-times "vcvtpd2ph" 3 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph" 3 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
> +typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
> +
> +__v2sf mm_cvtpd_ps_builtin_convertvector(__v2df a)
> +{
> + return __builtin_convertvector(a, __v2sf);
> +}
> +
> +__v4sf mm256_cvtpd_ps_builtin_convertvector(__v4df a)
> +{
> + return __builtin_convertvector(a, __v4sf);
> +}
> +
> +__v8sf mm512_cvtpd_ps_builtin_convertvector(__v8df a)
> +{
> + return __builtin_convertvector(a, __v8sf);
> +}
> +
> +__v2hf mm_cvtpd_ph_builtin_convertvector(__v2df a)
> +{
> + return __builtin_convertvector(a, __v2hf);
> +}
> +
> +__v4hf mm256_cvtpd_ph_builtin_convertvector(__v4df a)
> +{
> + return __builtin_convertvector(a, __v4hf);
> +}
> +
> +__v8hf mm512_cvtpd_ph_builtin_convertvector(__v8df a)
> +{
> + return __builtin_convertvector(a, __v8hf);
> +}
> +
> +__v4hf mm_cvtps_ph_builtin_convertvector(__v4sf a)
> +{
> + return __builtin_convertvector(a, __v4hf);
> +}
> +
> +__v8hf mm256_cvtps_ph_builtin_convertvector(__v8sf a)
> +{
> + return __builtin_convertvector(a, __v8hf);
> +}
> +
> +__v16hf mm512_cvtps_ph_builtin_convertvector(__v16sf a)
> +{
> + return __builtin_convertvector(a, __v16hf);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-4.c
> b/gcc/testsuite/gcc.target/i386/pr107432-4.c
> new file mode 100644
> index 00000000000..e537e7349e4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-4.c
> @@ -0,0 +1,56 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vcvtps2pd" 2 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtps2pd" 3 { target { ! ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vcvtph2pd" 3 } } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps" 3 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
> +typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
> +
> +__v2df mm_cvtps_pd_builtin_convertvector(__v2sf a)
> +{
> + return __builtin_convertvector(a, __v2df);
> +}
> +
> +__v4df mm256_cvtps_pd_builtin_convertvector(__v4sf a)
> +{
> + return __builtin_convertvector(a, __v4df);
> +}
> +
> +__v8df mm512_cvtps_pd_builtin_convertvector(__v8sf a)
> +{
> + return __builtin_convertvector(a, __v8df);
> +}
> +
> +__v2df mm_cvtph_pd_builtin_convertvector(__v2hf a)
> +{
> + return __builtin_convertvector(a, __v2df);
> +}
> +
> +__v4df mm256_cvtph_pd_builtin_convertvector(__v4hf a)
> +{
> + return __builtin_convertvector(a, __v4df);
> +}
> +
> +__v8df mm512_cvtph_pd_builtin_convertvector(__v8hf a)
> +{
> + return __builtin_convertvector(a, __v8df);
> +}
> +
> +__v4sf mm_cvtph_ps_builtin_convertvector(__v4hf a)
> +{
> + return __builtin_convertvector(a, __v4sf);
> +}
> +
> +__v8sf mm256_cvtph_ps_builtin_convertvector(__v8hf a)
> +{
> + return __builtin_convertvector(a, __v8sf);
> +}
> +
> +__v16sf mm512_cvtph_ps_builtin_convertvector(__v16hf a)
> +{
> + return __builtin_convertvector(a, __v16sf);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-5.c
> b/gcc/testsuite/gcc.target/i386/pr107432-5.c
> new file mode 100644
> index 00000000000..5a44ef9f3b9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-5.c
> @@ -0,0 +1,72 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512dq -mavx512fp16 -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 } } */
> +/* { dg-final { scan-assembler-times "vcvttps2qq" 2 { target { ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vcvttps2qq" 3 { target { ! ia32 } } }
> } */
> +/* { dg-final { scan-assembler-times "vcvttph2dq" 3 } } */
> +/* { dg-final { scan-assembler-times "vcvttph2qq" 3 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
> +typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
> +
> +__v2si mm_cvtpd_epi32_builtin_convertvector(__v2df a)
> +{
> + return __builtin_convertvector(a, __v2si);
> +}
> +
> +__v4si mm256_cvtpd_epi32_builtin_convertvector(__v4df a)
> +{
> + return __builtin_convertvector(a, __v4si);
> +}
> +
> +__v8si mm512_cvtpd_epi32_builtin_convertvector(__v8df a)
> +{
> + return __builtin_convertvector(a, __v8si);
> +}
> +
> +__v2di mm_cvtps_epi64_builtin_convertvector(__v2sf a)
> +{
> + return __builtin_convertvector(a, __v2di);
> +}
> +
> +__v4di mm256_cvtps_epi64_builtin_convertvector(__v4sf a)
> +{
> + return __builtin_convertvector(a, __v4di);
> +}
> +
> +__v8di mm512_cvtps_epi64_builtin_convertvector(__v8sf a)
> +{
> + return __builtin_convertvector(a, __v8di);
> +}
> +
> +__v4si mm_cvtph_epi32_builtin_convertvector(__v4hf a)
> +{
> + return __builtin_convertvector(a, __v4si);
> +}
> +
> +__v8si mm256_cvtph_epi32_builtin_convertvector(__v8hf a)
> +{
> + return __builtin_convertvector(a, __v8si);
> +}
> +
> +__v16si mm512_cvtph_epi32_builtin_convertvector(__v16hf a)
> +{
> + return __builtin_convertvector(a, __v16si);
> +}
> +
> +__v2di mm_cvtph_epi64_builtin_convertvector(__v2hf a)
> +{
> + return __builtin_convertvector(a, __v2di);
> +}
> +
> +__v4di mm256_cvtph_epi64_builtin_convertvector(__v4hf a)
> +{
> + return __builtin_convertvector(a, __v4di);
> +}
> +
> +__v8di mm512_cvtph_epi64_builtin_convertvector(__v8hf a)
> +{
> + return __builtin_convertvector(a, __v8di);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c
> b/gcc/testsuite/gcc.target/i386/pr107432-6.c
> new file mode 100644
> index 00000000000..4a68a10b089
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
> @@ -0,0 +1,139 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq
> -fno-trapping-math" } */
> +/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } } }
> } */
> +/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 } } }
> } */
> +/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } }
> } */
> +/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } }
> } */
> +/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */
> +/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
> +
> +#include <x86intrin.h>
> +
> +typedef char __v2qi __attribute__ ((__vector_size__ (2)));
> +typedef char __v4qi __attribute__ ((__vector_size__ (4)));
> +typedef char __v8qi __attribute__ ((__vector_size__ (8)));
> +typedef char __v16qi __attribute__ ((__vector_size__ (16)));
> +typedef unsigned char __v2qu __attribute__ ((vector_size (2)));
> +typedef unsigned char __v4qu __attribute__ ((vector_size (4)));
> +typedef unsigned char __v8qu __attribute__ ((vector_size (8)));
> +typedef unsigned char __v16qu __attribute__ ((vector_size (16)));
> +typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
> +typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
> +typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
> +
> +__v2qi mm_cvtpd_epi8_builtin_convertvector(__v2df a)
> +{
> + return __builtin_convertvector((__v2df)a, __v2qi);
> +}
> +
> +__v4qi mm256_cvtpd_epi8_builtin_convertvector(__v4df a)
> +{
> + return __builtin_convertvector((__v4df)a, __v4qi);
> +}
> +
> +__v8qi mm512_cvtpd_epi8_builtin_convertvector(__v8df a)
> +{
> + return __builtin_convertvector((__v8df)a, __v8qi);
> +}
> +
> +__v2qu mm_cvtpd_epu8_builtin_convertvector(__v2df a)
> +{
> + return __builtin_convertvector((__v2df)a, __v2qu);
> +}
> +
> +__v4qu mm256_cvtpd_epu8_builtin_convertvector(__v4df a)
> +{
> + return __builtin_convertvector((__v4df)a, __v4qu);
> +}
> +
> +__v8qu mm512_cvtpd_epu8_builtin_convertvector(__v8df a)
> +{
> + return __builtin_convertvector((__v8df)a, __v8qu);
> +}
> +
> +__v2qi mm64_cvtps_epi8_builtin_convertvector(__v2sf a)
> +{
> + return __builtin_convertvector((__v2sf)a, __v2qi);
> +}
> +
> +__v4qi mm128_cvtps_epi8_builtin_convertvector(__v4sf a)
> +{
> + return __builtin_convertvector((__v4sf)a, __v4qi);
> +}
> +
> +__v8qi mm256_cvtps_epi8_builtin_convertvector(__v8sf a)
> +{
> + return __builtin_convertvector((__v8sf)a, __v8qi);
> +}
> +
> +__v16qi mm512_cvtps_epi8_builtin_convertvector(__v16sf a)
> +{
> + return __builtin_convertvector((__v16sf)a, __v16qi);
> +}
> +
> +__v2qu mm64_cvtps_epu8_builtin_convertvector(__v2sf a)
> +{
> + return __builtin_convertvector((__v2sf)a, __v2qu);
> +}
> +
> +__v4qu mm128_cvtps_epu8_builtin_convertvector(__v4sf a)
> +{
> + return __builtin_convertvector((__v4sf)a, __v4qu);
> +}
> +
> +__v8qu mm256_cvtps_epu8_builtin_convertvector(__v8sf a)
> +{
> + return __builtin_convertvector((__v8sf)a, __v8qu);
> +}
> +
> +__v16qu mm512_cvtps_epu8_builtin_convertvector(__v16sf a)
> +{
> + return __builtin_convertvector((__v16sf)a, __v16qu);
> +}
> +
> +__v2qi mm32_cvtph_epi8_builtin_convertvector(__v2hf a)
> +{
> + return __builtin_convertvector((__v2hf)a, __v2qi);
> +}
> +
> +__v8qi mm128_cvtph_epi8_builtin_convertvector(__v8hf a)
> +{
> + return __builtin_convertvector((__v8hf)a, __v8qi);
> +}
> +
> +__v16qi mm256_cvtph_epi8_builtin_convertvector(__v16hf a)
> +{
> + return __builtin_convertvector((__v16hf)a, __v16qi);
> +}
> +
> +__v32qi mm512_cvtph_epi8_builtin_convertvector(__v32hf a)
> +{
> + return __builtin_convertvector((__v32hf)a, __v32qi);
> +}
> +
> +__v2qu mm32_cvtph_epu8_builtin_convertvector(__v2hf a)
> +{
> + return __builtin_convertvector((__v2hf)a, __v2qu);
> +}
> +
> +__v8qu mm128_cvtph_epu8_builtin_convertvector(__v8hf a)
> +{
> + return __builtin_convertvector((__v8hf)a, __v8qu);
> +}
> +
> +__v16qu mm256_cvtph_epu8_builtin_convertvector(__v16hf a)
> +{
> + return __builtin_convertvector((__v16hf)a, __v16qu);
> +}
> +
> +__v32qu mm512_cvtph_epu8_builtin_convertvector(__v32hf a)
> +{
> + return __builtin_convertvector((__v32hf)a, __v32qu);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-7.c
> b/gcc/testsuite/gcc.target/i386/pr107432-7.c
> new file mode 100644
> index 00000000000..1b33e9a9508
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-7.c
> @@ -0,0 +1,150 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq
> -fno-trapping-math" } */
> +/* { dg-final { scan-assembler-times "vcvtdq2pd" 4 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtdq2pd" 6 { target { ! ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vcvtdq2ps" 6 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtdq2ps" 8 { target { ! ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vcvtw2ph" 8 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvtw2ph" 10 { target { ! ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } } } }
> */
> +/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } } } }
> */
> +
> +#include <x86intrin.h>
> +
> +typedef char __v2qi __attribute__ ((__vector_size__ (2)));
> +typedef char __v4qi __attribute__ ((__vector_size__ (4)));
> +typedef char __v8qi __attribute__ ((__vector_size__ (8)));
> +typedef char __v16qi __attribute__ ((__vector_size__ (16)));
> +typedef unsigned char __v2qu __attribute__ ((vector_size (2)));
> +typedef unsigned char __v4qu __attribute__ ((vector_size (4)));
> +typedef unsigned char __v8qu __attribute__ ((vector_size (8)));
> +typedef unsigned char __v16qu __attribute__ ((vector_size (16)));
> +typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
> +typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
> +typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
> +
> +__v2df mm_cvtepi8_pd_builtin_convertvector(__v2qi a)
> +{
> + return __builtin_convertvector((__v2qi)a, __v2df);
> +}
> +
> +__v4df mm256_cvtepi8_pd_builtin_convertvector(__v4qi a)
> +{
> + return __builtin_convertvector((__v4qi)a, __v4df);
> +}
> +
> +__v8df mm512_cvtepi8_pd_builtin_convertvector(__v8qi a)
> +{
> + return __builtin_convertvector((__v8qi)a, __v8df);
> +}
> +
> +__v2df mm_cvtepu8_pd_builtin_convertvector(__v2qu a)
> +{
> + return __builtin_convertvector((__v2qu)a, __v2df);
> +}
> +
> +__v4df mm256_cvtepu8_pd_builtin_convertvector(__v4qu a)
> +{
> + return __builtin_convertvector((__v4qu)a, __v4df);
> +}
> +
> +__v8df mm512_cvtepu8_pd_builtin_convertvector(__v8qu a)
> +{
> + return __builtin_convertvector((__v8qu)a, __v8df);
> +}
> +
> +__v2sf mm64_cvtepi8_ps_builtin_convertvector(__v2qi a)
> +{
> + return __builtin_convertvector((__v2qi)a, __v2sf);
> +}
> +
> +__v4sf mm128_cvtepi8_ps_builtin_convertvector(__v4qi a)
> +{
> + return __builtin_convertvector((__v4qi)a, __v4sf);
> +}
> +
> +__v8sf mm256_cvtepi8_ps_builtin_convertvector(__v8qi a)
> +{
> + return __builtin_convertvector((__v8qi)a, __v8sf);
> +}
> +
> +__v16sf mm512_cvtepi8_ps_builtin_convertvector(__v16qi a)
> +{
> + return __builtin_convertvector((__v16qi)a, __v16sf);
> +}
> +
> +__v2sf mm64_cvtepu8_ps_builtin_convertvector(__v2qu a)
> +{
> + return __builtin_convertvector((__v2qu)a, __v2sf);
> +}
> +
> +__v4sf mm128_cvtepu8_ps_builtin_convertvector(__v4qu a)
> +{
> + return __builtin_convertvector((__v4qu)a, __v4sf);
> +}
> +
> +__v8sf mm256_cvtepu8_ps_builtin_convertvector(__v8qu a)
> +{
> + return __builtin_convertvector((__v8qu)a, __v8sf);
> +}
> +
> +__v16sf mm512_cvtepu8_ps_builtin_convertvector(__v16qu a)
> +{
> + return __builtin_convertvector((__v16qu)a, __v16sf);
> +}
> +
> +__v2hf mm32_cvtepi8_ph_builtin_convertvector(__v2qi a)
> +{
> + return __builtin_convertvector((__v2qi)a, __v2hf);
> +}
> +
> +__v4hf mm64_cvtepi8_ph_builtin_convertvector(__v4qi a)
> +{
> + return __builtin_convertvector((__v4qi)a, __v4hf);
> +}
> +
> +__v8hf mm128_cvtepi8_ph_builtin_convertvector(__v8qi a)
> +{
> + return __builtin_convertvector((__v8qi)a, __v8hf);
> +}
> +
> +__v16hf mm256_cvtepi8_ph_builtin_convertvector(__v16qi a)
> +{
> + return __builtin_convertvector((__v16qi)a, __v16hf);
> +}
> +
> +__v32hf mm512_cvtepi8_ph_builtin_convertvector(__v32qi a)
> +{
> + return __builtin_convertvector((__v32qi)a, __v32hf);
> +}
> +
> +__v2hf mm32_cvtepu8_ph_builtin_convertvector(__v2qu a)
> +{
> + return __builtin_convertvector((__v2qu)a, __v2hf);
> +}
> +
> +__v4hf mm64_cvtepu8_ph_builtin_convertvector(__v4qu a)
> +{
> + return __builtin_convertvector((__v4qu)a, __v4hf);
> +}
> +
> +__v8hf mm128_cvtepu8_ph_builtin_convertvector(__v8qu a)
> +{
> + return __builtin_convertvector((__v8qu)a, __v8hf);
> +}
> +
> +__v16hf mm256_cvtepu8_ph_builtin_convertvector(__v16qu a)
> +{
> + return __builtin_convertvector((__v16qu)a, __v16hf);
> +}
> +
> +__v32hf mm512_cvtepu8_ph_builtin_convertvector(__v32qu a)
> +{
> + return __builtin_convertvector((__v32qu)a, __v32hf);
> +}
> diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
> index ea0069f7a67..b5c87bb0e13 100644
> --- a/gcc/tree-vect-generic.cc
> +++ b/gcc/tree-vect-generic.cc
> @@ -45,6 +45,8 @@ along with GCC; see the file COPYING3. If not see
> #include "gimple-match.h"
> #include "recog.h" /* FIXME: for insn_data */
> #include "optabs-libfuncs.h"
> +#include "cfgloop.h"
> +#include "tree-vectorizer.h"
>
>
> /* Build a ternary operation and gimplify it. Emit code before GSI.
> @@ -1870,14 +1872,36 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
> else if (ret_elt_bits > arg_elt_bits)
> modifier = WIDEN;
>
> - if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
> + /*
> + if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> {
> - if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> + g = gimple_build_assign (lhs, code1, arg);
> + gsi_replace (gsi, g, false);
> + return;
> + }
> + */
> +
> + vec<std::pair<tree, tree_code> > converts = vNULL;
This leaks, please simply use
auto_vec<std::pair<...
> + if (supportable_indirect_convert_operation (code,
> + ret_type, arg_type,
> + &converts,
> + arg))
> + {
> + if (TYPE_MODE (converts[0].first) == TYPE_MODE (ret_type))
> + g = gimple_build_assign (lhs, converts[0].second, arg);
> + else
> {
> - g = gimple_build_assign (lhs, code1, arg);
> - gsi_replace (gsi, g, false);
> - return;
> + new_rhs = make_ssa_name (converts[0].first);
> + g = gimple_build_assign (new_rhs, converts[0].second, arg);
> + gsi_insert_before (gsi, g, GSI_SAME_STMT);
> + g = gimple_build_assign (lhs, converts[1].second, new_rhs);
> }
> + gsi_replace (gsi, g, false);
I would like to see a loop over 'converts' here.
> + return;
> + }
> +
> + if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
> + {
> /* Can't use get_compute_type here, as supportable_convert_operation
> doesn't necessarily use an optab and needs two arguments. */
> tree vec_compute_type
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 05a169ecb2d..f4c829ff6af 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
> tree scalar_dest;
> tree op0, op1 = NULL_TREE;
> loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> - tree_code tc1, tc2;
> + tree_code tc1;
> code_helper code, code1, code2;
> code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
> tree new_temp;
> @@ -5367,6 +5367,7 @@ vectorizable_conversion (vec_info *vinfo,
> scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
> scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
> opt_scalar_mode rhs_mode_iter;
> + vec<std::pair<tree, tree_code> > converts = vNULL;
>
> /* Supportable by target? */
> switch (modifier)
> @@ -5377,99 +5378,25 @@ vectorizable_conversion (vec_info *vinfo,
> && !CONVERT_EXPR_CODE_P (code))
> return false;
> gcc_assert (code.is_tree_code ());
> - if (supportable_convert_operation ((tree_code) code, vectype_out,
> - vectype_in, &tc1))
> - {
> - code1 = tc1;
> - break;
> - }
> -
> - /* For conversions between float and integer types try whether
> - we can use intermediate signed integer types to support the
> - conversion. */
> - if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> - && (code == FLOAT_EXPR ||
> - (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> - {
> - bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
> - bool float_expr_p = code == FLOAT_EXPR;
> - unsigned short target_size;
> - scalar_mode intermediate_mode;
> - if (demotion)
> - {
> - intermediate_mode = lhs_mode;
> - target_size = GET_MODE_SIZE (rhs_mode);
> - }
> + if (supportable_indirect_convert_operation (code,
> + vectype_out,
> + vectype_in,
> + &converts,
> + op0))
> + {
> + if (converts.length () == 1)
> + code1 = converts[0].second;
> else
> {
> - target_size = GET_MODE_SIZE (lhs_mode);
> - if (!int_mode_for_size
> - (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
> - goto unsupported;
> - }
> - code1 = float_expr_p ? code : NOP_EXPR;
> - codecvt1 = float_expr_p ? NOP_EXPR : code;
> - opt_scalar_mode mode_iter;
> - FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> - {
> - intermediate_mode = mode_iter.require ();
> -
> - if (GET_MODE_SIZE (intermediate_mode) > target_size)
> - break;
> -
> - scalar_mode cvt_mode;
> - if (!int_mode_for_size
> - (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
> - break;
> -
> - cvt_type = build_nonstandard_integer_type
> - (GET_MODE_BITSIZE (cvt_mode), 0);
> -
> - /* Check if the intermediate type can hold OP0's range.
> - When converting from float to integer this is not necessary
> - because values that do not fit the (smaller) target type are
> - unspecified anyway. */
> - if (demotion && float_expr_p)
> - {
> - wide_int op_min_value, op_max_value;
> - if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
> - break;
> -
> - if (cvt_type == NULL_TREE
> - || (wi::min_precision (op_max_value, SIGNED)
> - > TYPE_PRECISION (cvt_type))
> - || (wi::min_precision (op_min_value, SIGNED)
> - > TYPE_PRECISION (cvt_type)))
> - continue;
> - }
> -
> - cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type,
> slp_node);
> - /* This should only happened for SLP as long as loop vectorizer
> - only supports same-sized vector. */
> - if (cvt_type == NULL_TREE
> - || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
> - || !supportable_convert_operation ((tree_code) code1,
> - vectype_out,
> - cvt_type, &tc1)
> - || !supportable_convert_operation ((tree_code) codecvt1,
> - cvt_type,
> - vectype_in, &tc2))
> - continue;
> -
> - found_mode = true;
> - break;
> - }
> -
> - if (found_mode)
> - {
> - multi_step_cvt++;
> - interm_types.safe_push (cvt_type);
> - cvt_type = NULL_TREE;
> - code1 = tc1;
> - codecvt1 = tc2;
> - break;
> + gcc_assert (converts.length () == 2);
I'd rather see a && converts.length () <= 2 check after the
supportable_indirect_convert_operation call.
> + multi_step_cvt = converts.length () - 1;
> + codecvt1 = converts[0].second;
> + code1 = converts[1].second;
> + interm_types.safe_push (converts[0].first);
cvt_type is no longer assigned from NULL_TREE, not sure if that matters.
OK with those changes.
Thanks,
Richard.
> }
> + break;
> }
> +
> /* FALLTHRU */
> unsupported:
> if (dump_enabled_p ())
> @@ -14626,6 +14553,158 @@ supportable_narrowing_operation (code_helper code,
> return false;
> }
>
> +/* Function supportable_indirect_convert_operation
> +
> + Check whether an operation represented by the code CODE is two
> + convert operations that are supported by the target platform in
> + vector form (i.e., when operating on arguments of type VECTYPE_IN
> + producing a result of type VECTYPE_OUT).
> +
> + Convert operations we currently support directly are FIX_TRUNC and FLOAT.
> + This function checks if these operations are supported
> + by the target platform directly (via vector tree-codes).
> +
> + Output:
> + - CODE1 is the code of a vector operation to be used when
> + converting the operation in the first step, if available.
> + - CODE2 is the code of a vector operation to be used when
> + converting the operation in the second step, if available.
> + - MULTI_STEP_CVT determines the number of required intermediate steps in
> + case of multi-step conversion (like int->short->char - in that case
> + MULTI_STEP_CVT will be 1). In the function, it should be 1.
> + - INTERM_TYPES contains the intermediate type required to perform the
> + convert operation (short in the above example). */
> +bool
> +supportable_indirect_convert_operation (code_helper code,
> + tree vectype_out,
> + tree vectype_in,
> + vec<std::pair<tree, tree_code> >
> *converts,
> + tree op0)
> +{
> + bool found_mode = false;
> + scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
> + scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
> + opt_scalar_mode mode_iter;
> + tree_code tc1, tc2, code1, code2;
> +
> + tree cvt_type = NULL_TREE;
> + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
> +
> + if (supportable_convert_operation ((tree_code) code,
> + vectype_out,
> + vectype_in,
> + &tc1))
> + {
> + converts->safe_push (std::make_pair (vectype_out, tc1));
> + return true;
> + }
> +
> + /* For conversions between float and integer types try whether
> + we can use intermediate signed integer types to support the
> + conversion. */
> + if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> + && (code == FLOAT_EXPR
> + || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> + {
> + bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
> + bool float_expr_p = code == FLOAT_EXPR;
> + unsigned short target_size;
> + scalar_mode intermediate_mode;
> + if (demotion)
> + {
> + intermediate_mode = lhs_mode;
> + target_size = GET_MODE_SIZE (rhs_mode);
> + }
> + else
> + {
> + target_size = GET_MODE_SIZE (lhs_mode);
> + if (!int_mode_for_size
> + (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
> + return false;
> + }
> + code1 = float_expr_p ? (tree_code) code : NOP_EXPR;
> + code2 = float_expr_p ? NOP_EXPR : (tree_code) code;
> + opt_scalar_mode mode_iter;
> + FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> + {
> + intermediate_mode = mode_iter.require ();
> +
> + if (GET_MODE_SIZE (intermediate_mode) > target_size)
> + break;
> +
> + scalar_mode cvt_mode;
> + if (!int_mode_for_size
> + (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
> + break;
> +
> + cvt_type = build_nonstandard_integer_type
> + (GET_MODE_BITSIZE (cvt_mode), 0);
> +
> + /* Check if the intermediate type can hold OP0's range.
> + When converting from float to integer this is not necessary
> + because values that do not fit the (smaller) target type are
> + unspecified anyway. */
> + if (demotion && float_expr_p)
> + {
> + wide_int op_min_value, op_max_value;
> + /* For vector form, it looks like op0 doesn't have RANGE_INFO.
> + In the future, if it is supported, changes may need to be made
> + to this part, such as checking the RANGE of each element
> + in the vector. */
> + if (!SSA_NAME_RANGE_INFO (op0)
> + || !vect_get_range_info (op0, &op_min_value, &op_max_value))
> + break;
> +
> + if (cvt_type == NULL_TREE
> + || (wi::min_precision (op_max_value, SIGNED)
> + > TYPE_PRECISION (cvt_type))
> + || (wi::min_precision (op_min_value, SIGNED)
> + > TYPE_PRECISION (cvt_type)))
> + continue;
> + }
> +
> + /*
> + if (vinfo != NULL && slp_node != NULL)
> + cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
> + else
> + {
> + bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out))
> + || TYPE_UNSIGNED (TREE_TYPE (vectype_in));
> + cvt_type = build_nonstandard_integer_type
> + (GET_MODE_BITSIZE (cvt_mode), uns);
> + cvt_type = build_vector_type (cvt_type, nelts);
> + }
> + */
> + cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE
> (vectype_in),
> + cvt_type,
> + nelts);
> + /* This should only happened for SLP as long as loop vectorizer
> + only supports same-sized vector. */
> + if (cvt_type == NULL_TREE
> + || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
> + || !supportable_convert_operation ((tree_code) code1,
> + vectype_out,
> + cvt_type, &tc1)
> + || !supportable_convert_operation ((tree_code) code2,
> + cvt_type,
> + vectype_in, &tc2))
> + continue;
> +
> + found_mode = true;
> + break;
> + }
> +
> + if (found_mode)
> + {
> + converts->safe_push (std::make_pair (cvt_type, tc2));
> + if (TYPE_MODE (cvt_type) != TYPE_MODE (vectype_out))
> + converts->safe_push (std::make_pair (vectype_out, tc1));
> + return true;
> + }
> + }
> + return false;
> +}
> +
> /* Generate and return a vector mask of MASK_TYPE such that
> mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
> Add the statements to SEQ. */
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 97ec9c341e7..53b3f24cb2e 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2265,6 +2265,10 @@ extern bool supportable_widening_operation (vec_info*,
> code_helper,
> extern bool supportable_narrowing_operation (code_helper, tree, tree,
> code_helper *, int *,
> vec<tree> *);
> +extern bool supportable_indirect_convert_operation (code_helper,
> + tree, tree,
> + vec<std::pair<tree,
> tree_code> > *,
> + tree = NULL_TREE);
>
> extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
> enum vect_cost_for_stmt, stmt_vec_info,
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)