On Sun, Mar 03, 2019 at 10:34:29PM +0100, Uros Bizjak wrote: > On Sun, Mar 3, 2019 at 10:18 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > On Sun, Mar 3, 2019 at 9:27 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > > > 32-bit indices in VSIB address are sign-extended to 64 bits. In x32, > > > > when 32-bit indices are used as addresses, like in > > > > > > > > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6 > > > > > > > > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which > > > > is invalid address. Add addr32 prefix to UNSPEC_VSIBADDR instructions > > > > for x32 if there is no base register nor symbol. > > > > > > > > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with > > > > > > > > -Ofast -funroll-loops -march=haswell > > > > > > 1. Testcases 2 to 9 fail on fedora-29 with: > > > > > > In file included from /usr/include/features.h:452, > > > from /usr/include/bits/libc-header-start.h:33, > > > from /usr/include/stdlib.h:25, > > > from /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27, > > > from /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34, > > > from /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29, > > > from > > > /home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7: > > > /usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such > > > file or directory > > > > I will update tests to remove "#include immintrin.h" > > > > > 2. Does the patch work with -maddress-mode={short,long}? > > > > Yes. > > > > > 3. The implementation is wrong. You should use operand substitution > > > with VSIB address as operand, not substitution without operand. > > > > How can I add an addr32 prefix with operand substitution? This is > > very similar to "%^". My updated patch will use "%^". > > Yes, using %^ is what I think would be the optimal solution. Other > than that, in your proposed patch, operand-less %_ scans the entire > current_output_insn to dig to the UNSPEC_VSIBADDR. You can just use > operand substitution, and do e.g. "%X2vgatherpf0..." where 'X' > processes operand 2 (vsib_address_operand) and conditionally outputs > addr32. > > BTW: In a new version of the patch, please specify what is changed > from the previous version. Otherwise, review of a new version is more > or less a guesswork what changed. >
Here is the updated patch. The change is return "%P5vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; instead of return "%^vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; We can't use the %X5 since %X5 is used on operands. I also added a test for -maddress-mode=long. H.J. --- 32-bit indices in VSIB address are sign-extended to 64 bits. In x32, when 32-bit indices are used as addresses, like in vgatherdps %ymm7, 0(,%ymm9,1), %ymm6 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which is invalid address. Add addr32 prefix to UNSPEC_VSIBADDR instructions for x32 if there is no base register nor symbol. This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with -Ofast -funroll-loops -march=haswell gcc/ PR target/89523 * config/i386/i386.c (ix86_print_operand): Handle UNSPEC_VSIBADDR instructions for '%P' to add addr32 prefix if required. * config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend "%P5" to opcode. (*avx512pf_gatherpf<mode>df_mask): Likewise. (*avx512pf_scatterpf<mode>sf_mask): Likewise. (*avx512pf_scatterpf<mode>df_mask): Likewise. (*avx2_gathersi<mode>): Prepend "%P7" to opcode. (*avx2_gathersi<mode>_2): Prepend "%P6" to opcode. (*avx2_gatherdi<mode>): Prepend "%P7" to opcode. (*avx2_gatherdi<mode>_2): Prepend "%P6" to opcode. (*avx2_gatherdi<mode>_3): Prepend "%P7" to opcode. (*avx2_gatherdi<mode>_4): Prepend "%P6" to opcode.` (*avx512f_gathersi<mode>): Prepend "%P5" to opcode. (*avx512f_gathersi<mode>_2): Prepend "%P6" to opcode. (*avx512f_gatherdi<mode>): Prepend "%P5" to opcode. (*avx512f_gatherdi<mode>_2): Likewise. (*avx512f_scattersi<mode>): Likewise. (*avx512f_scatterdi<mode>): Likewise. gcc/testsuite/ PR target/89523 * gcc.target/i386/pr89523-1a.c: New test. * gcc.target/i386/pr89523-1b.c: Likewise. * gcc.target/i386/pr89523-2.c: Likewise. * gcc.target/i386/pr89523-3.c: Likewise. * gcc.target/i386/pr89523-4.c: Likewise. * gcc.target/i386/pr89523-5.c: Likewise. * gcc.target/i386/pr89523-6.c: Likewise. * gcc.target/i386/pr89523-7.c: Likewise. * gcc.target/i386/pr89523-8.c: Likewise. * gcc.target/i386/pr89523-9.c: Likewise. --- gcc/config/i386/i386.c | 35 +++++++++++++++- gcc/config/i386/sse.md | 46 +++++++++++----------- gcc/testsuite/gcc.target/i386/pr89523-1a.c | 24 +++++++++++ gcc/testsuite/gcc.target/i386/pr89523-1b.c | 7 ++++ gcc/testsuite/gcc.target/i386/pr89523-2.c | 37 +++++++++++++++++ gcc/testsuite/gcc.target/i386/pr89523-3.c | 36 +++++++++++++++++ gcc/testsuite/gcc.target/i386/pr89523-4.c | 36 +++++++++++++++++ gcc/testsuite/gcc.target/i386/pr89523-5.c | 39 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr89523-6.c | 38 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr89523-7.c | 42 ++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr89523-8.c | 41 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr89523-9.c | 30 ++++++++++++++ 12 files changed, 386 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c8f9957163b..ae9befb638d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -17793,7 +17793,8 @@ print_reg (rtx x, int code, FILE *file) y -- print "st(0)" instead of "st" as a register. d -- print duplicated register operand for AVX instruction. D -- print condition for SSE cmp instruction. - P -- if PIC, print an @PLT suffix. + P -- if PIC, print an @PLT suffix or print addr32 prefix for + TARGET_X32 with UNSPEC_VSIBADDR operand. p -- print raw symbol name. X -- don't print any sort of PIC '@' suffix for a symbol. & -- print some in-use local-dynamic symbol name. @@ -18010,6 +18011,37 @@ ix86_print_operand (FILE *file, rtx x, int code) output_operand_lossage ("invalid operand size for operand code 'Z'"); return; + case 'P': + if (MEM_P (x)) + { + x = XEXP (x, 0); + if (GET_CODE (x) == UNSPEC + || XINT (x, 1) == UNSPEC_VSIBADDR) + { + if (TARGET_X32) + { + /* NB: 32-bit indices in VSIB address are + sign-extended to 64 bits. In x32, if 32-bit + address 0xf7fa3010 is sign-extended to + 0xfffffffff7fa3010 which is invalid address. + Add addr32 prefix if there is no base register + nor symbol. */ + bool ok; + struct ix86_address parts; + ok = ix86_decompose_address (XVECEXP (x, 0, 0), + &parts); + gcc_assert (ok && parts.index == NULL_RTX); + if (parts.base == NULL_RTX + && (parts.disp == NULL_RTX + || !symbolic_operand (parts.disp, + GET_MODE (parts.disp)))) + fputs ("addr32 ", file); + } + return; + } + } + break; + case 'd': case 'b': case 'w': @@ -18021,7 +18053,6 @@ ix86_print_operand (FILE *file, rtx x, int code) case 'y': case 'x': case 'X': - case 'P': case 'p': case 'V': break; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ac299495b2c..ac500f9cc63 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17401,9 +17401,9 @@ case 3: /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as gas changed what it requires incompatibly. */ - return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; + return "%P5vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; case 2: - return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; + return "%P5vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; default: gcc_unreachable (); } @@ -17448,9 +17448,9 @@ case 3: /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as gas changed what it requires incompatibly. */ - return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; + return "%P5vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; case 2: - return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; + return "%P5vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; default: gcc_unreachable (); } @@ -17496,10 +17496,10 @@ case 7: /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as gas changed what it requires incompatibly. */ - return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; + return "%P5vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; case 2: case 6: - return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; + return "%P5vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; default: gcc_unreachable (); } @@ -17545,10 +17545,10 @@ case 7: /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as gas changed what it requires incompatibly. */ - return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; + return "%P5vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; case 2: case 6: - return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; + return "%P5vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; default: gcc_unreachable (); } @@ -20292,7 +20292,7 @@ UNSPEC_GATHER)) (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] "TARGET_AVX2" - "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}" + "%P7v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) @@ -20312,7 +20312,7 @@ UNSPEC_GATHER)) (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] "TARGET_AVX2" - "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}" + "%P6v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) @@ -20353,7 +20353,7 @@ UNSPEC_GATHER)) (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] "TARGET_AVX2" - "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}" + "%P7v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) @@ -20375,8 +20375,8 @@ "TARGET_AVX2" { if (<MODE>mode != <VEC_GATHER_SRCDI>mode) - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}"; - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"; + return "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}"; + return "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"; } [(set_attr "type" "ssemov") (set_attr "prefix" "vex") @@ -20400,7 +20400,7 @@ (const_int 2) (const_int 3)]))) (clobber (match_scratch:VI4F_256 1 "=&x"))] "TARGET_AVX2" - "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}" + "%P7v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) @@ -20423,7 +20423,7 @@ (const_int 2) (const_int 3)]))) (clobber (match_scratch:VI4F_256 1 "=&x"))] "TARGET_AVX2" - "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}" + "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}" [(set_attr "type" "ssemov") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) @@ -20463,7 +20463,7 @@ "TARGET_AVX512F" ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as ;; gas changed what it requires incompatibly. - "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}" + "%P6v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) @@ -20484,7 +20484,7 @@ "TARGET_AVX512F" ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as ;; gas changed what it requires incompatibly. - "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}" + "%P5v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) @@ -20525,7 +20525,7 @@ "TARGET_AVX512F" ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as ;; gas changed what it requires incompatibly. - "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}" + "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) @@ -20550,11 +20550,11 @@ if (<MODE>mode != <VEC_GATHER_SRCDI>mode) { if (<MODE_SIZE> != 64) - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}"; + return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}"; else - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}"; + return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}"; } - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"; + return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"; } [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -20593,7 +20593,7 @@ "TARGET_AVX512F" ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as ;; gas changed what it requires incompatibly. - "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}" + "%P5v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) @@ -20631,7 +20631,7 @@ "TARGET_AVX512F" ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as ;; gas changed what it requires incompatibly. - "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}" + "%P5v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1a.c b/gcc/testsuite/gcc.target/i386/pr89523-1a.c new file mode 100644 index 00000000000..0d0edab0363 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-maddress-mode=short -mx32 -Ofast -funroll-loops -march=haswell" } */ +/* { dg-final { scan-assembler-not "\tvgather" } } */ +/* { dg-final { scan-assembler "addr32 vgather" } } */ + +void foo (void); + +extern float *ncost; + +float +bar (int type, int num) +{ + int i; + float cost; + + cost = 0; + for (i = 0; i < num; i++) + if (type) + cost += ncost[i]; + else + foo (); + return (cost); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1b.c b/gcc/testsuite/gcc.target/i386/pr89523-1b.c new file mode 100644 index 00000000000..6a5c1d43625 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-1b.c @@ -0,0 +1,7 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-maddress-mode=long -mx32 -Ofast -funroll-loops -march=haswell" } */ +/* { dg-final { scan-assembler-not "\tvgather" } } */ +/* { dg-final { scan-assembler "addr32 vgather" } } */ + +#include "pr89523-1a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c new file mode 100644 index 00000000000..2ffbffe5e40 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c @@ -0,0 +1,37 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -march=haswell" } */ +/* { dg-final { scan-assembler "\tvgather" } } */ +/* { dg-final { scan-assembler-not "addr32 vgather" } } */ + +typedef double __v2df __attribute__ ((__vector_size__ (16))); +typedef int __v4si __attribute__ ((__vector_size__ (16))); +typedef long long __v2di __attribute__ ((__vector_size__ (16))); + +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale) +{ + __v2df __zero = { 0.0, 0.0 }; + __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero); + __v2df x = x; + + return (__m128d) __builtin_ia32_gathersiv2df (x, + __base, + (__v4si)__index, + __mask, + __scale); +} + +__m128d x; +double *base; +__m128i idx; + +void extern +avx2_test (void) +{ + x = _mm_i32gather_pd (base, idx, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c new file mode 100644 index 00000000000..fc3631b694b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c @@ -0,0 +1,36 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -march=haswell" } */ +/* { dg-final { scan-assembler "\tvgather" } } */ +/* { dg-final { scan-assembler-not "addr32 vgather" } } */ + +typedef double __v2df __attribute__ ((__vector_size__ (16))); +typedef int __v4si __attribute__ ((__vector_size__ (16))); +typedef long long __v2di __attribute__ ((__vector_size__ (16))); + +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_i64gather_pd (double const *__base, __m128i __index, const int __scale) +{ + __v2df __zero = { 0.0, 0.0 }; + __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero); + + return (__m128d) __builtin_ia32_gatherdiv2df (__zero, + __base, + (__v2di)__index, + __mask, + __scale); +} + +__m128d x; +double *base; +__m128i idx; + +void extern +avx2_test (void) +{ + x = _mm_i64gather_pd (base, idx, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c new file mode 100644 index 00000000000..3436e5dcae3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c @@ -0,0 +1,36 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -march=haswell" } */ +/* { dg-final { scan-assembler-not "\tvgather" } } */ +/* { dg-final { scan-assembler "addr32 vgather" } } */ + +typedef double __v2df __attribute__ ((__vector_size__ (16))); +typedef int __v4si __attribute__ ((__vector_size__ (16))); +typedef long long __v2di __attribute__ ((__vector_size__ (16))); + +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale) +{ + __v2df __zero = { 0.0, 0.0 }; + __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero); + __v2df x = x; + + return (__m128d) __builtin_ia32_gathersiv2df (x, + __base, + (__v4si)__index, + __mask, + __scale); +} + +__m128d x; +__m128i idx; + +void extern +avx2_test (void) +{ + x = _mm_i32gather_pd ((void *) 0, idx, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c new file mode 100644 index 00000000000..6a769c7a249 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c @@ -0,0 +1,39 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -mavx512pf" } */ +/* { dg-final { scan-assembler "\tvgather" } } */ +/* { dg-final { scan-assembler-not "addr32 vgather" } } */ + +typedef int __v8si __attribute__ ((__vector_size__ (32))); +typedef long long __m256i __attribute__ ((__vector_size__ (32), + __may_alias__)); +typedef unsigned char __mmask8; + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr, + int __scale, int __hint) +{ + __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr, + __scale, __hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask, + void const *__addr, int __scale, int __hint) +{ + __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale, + __hint); +} + +volatile __m256i idx; +volatile __mmask8 m8; +void *base; + +void extern +avx512pf_test (void) +{ + _mm512_prefetch_i32gather_pd (idx, base, 8, 3); + _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 3); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c new file mode 100644 index 00000000000..82f795e085c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c @@ -0,0 +1,38 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -mavx512pf" } */ +/* { dg-final { scan-assembler-not "\tvgather" } } */ +/* { dg-final { scan-assembler "addr32 vgather" } } */ + +typedef int __v8si __attribute__ ((__vector_size__ (32))); +typedef long long __m256i __attribute__ ((__vector_size__ (32), + __may_alias__)); +typedef unsigned char __mmask8; + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr, + int __scale, int __hint) +{ + __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr, + __scale, __hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask, + void const *__addr, int __scale, int __hint) +{ + __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale, + __hint); +} + +volatile __m256i idx; +volatile __mmask8 m8; + +void extern +avx512pf_test (void) +{ + _mm512_prefetch_i32gather_pd (idx, (void *) 0, 8, 3); + _mm512_mask_prefetch_i32gather_pd (idx, m8, (void *) 0, 8, 3); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c new file mode 100644 index 00000000000..030b00d268a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c @@ -0,0 +1,42 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -mavx512f" } */ +/* { dg-final { scan-assembler "\tvscatter" } } */ +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */ + +typedef int __v8si __attribute__ ((__vector_size__ (32))); +typedef double __v8df __attribute__ ((__vector_size__ (64))); +typedef long long __m256i __attribute__ ((__vector_size__ (32), + __may_alias__)); +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); +typedef unsigned char __mmask8; + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1, + int __scale) +{ + __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF, + (__v8si) __index, (__v8df) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask, + __m256i __index, __m512d __v1, int __scale) +{ + __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index, + (__v8df) __v1, __scale); +} + +volatile __m512d src; +volatile __m256i idx; +volatile __mmask8 m8; +double *addr; + +void extern +avx512f_test (void) +{ + _mm512_i32scatter_pd (addr, idx, src, 8); + _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c new file mode 100644 index 00000000000..465c985c2b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c @@ -0,0 +1,41 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -mavx512f" } */ +/* { dg-final { scan-assembler "\tvscatter" } } */ +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */ + +typedef long long __v8di __attribute__ ((__vector_size__ (64))); +typedef double __v8df __attribute__ ((__vector_size__ (64))); +typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); +typedef unsigned char __mmask8; + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1, + int __scale) +{ + __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF, + (__v8di) __index, (__v8df) __v1, __scale); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask, + __m512i __index, __m512d __v1, int __scale) +{ + __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index, + (__v8df) __v1, __scale); +} + +volatile __m512d src; +volatile __m512i idx; +volatile __mmask8 m8; +double *addr; + +void extern +avx512f_test (void) +{ + _mm512_i64scatter_pd (addr, idx, src, 8); + _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c new file mode 100644 index 00000000000..e9323126bd6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-options "-mx32 -O2 -mavx512f" } */ +/* { dg-final { scan-assembler-not "\tvscatter" } } */ +/* { dg-final { scan-assembler "addr32 vscatter" } } */ + +typedef int __v8si __attribute__ ((__vector_size__ (32))); +typedef double __v8df __attribute__ ((__vector_size__ (64))); +typedef long long __m256i __attribute__ ((__vector_size__ (32), + __may_alias__)); +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); +typedef unsigned char __mmask8; + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1, + int __scale) +{ + __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF, + (__v8si) __index, (__v8df) __v1, __scale); +} + +volatile __m512d src; +volatile __m256i idx; + +void extern +avx512f_test (void) +{ + _mm512_i32scatter_pd ((void *) 0, idx, src, 8); +} -- 2.20.1