On Mon, Mar 4, 2019 at 2:54 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > On Sun, Mar 03, 2019 at 10:34:29PM +0100, Uros Bizjak wrote: > > On Sun, Mar 3, 2019 at 10:18 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > On Sun, Mar 3, 2019 at 9:27 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > > > On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > > > > > 32-bit indices in VSIB address are sign-extended to 64 bits. In x32, > > > > > when 32-bit indices are used as addresses, like in > > > > > > > > > > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6 > > > > > > > > > > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 > > > > > which > > > > > is invalid address. Add addr32 prefix to UNSPEC_VSIBADDR instructions > > > > > for x32 if there is no base register nor symbol. > > > > > > > > > > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with > > > > > > > > > > -Ofast -funroll-loops -march=haswell > > > > > > > > 1. Testcases 2 to 9 fail on fedora-29 with: > > > > > > > > In file included from /usr/include/features.h:452, > > > > from /usr/include/bits/libc-header-start.h:33, > > > > from /usr/include/stdlib.h:25, > > > > from > > > > /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27, > > > > from > > > > /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34, > > > > from > > > > /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29, > > > > from > > > > /home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7: > > > > /usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such > > > > file or directory > > > > > > I will update tests to remove "#include immintrin.h" > > > > > > > 2. Does the patch work with -maddress-mode={short,long}? > > > > > > Yes. > > > > > > > 3. The implementation is wrong. You should use operand substitution > > > > with VSIB address as operand, not substitution without operand. > > > > > > How can I add an addr32 prefix with operand substitution? This is > > > very similar to "%^". My updated patch will use "%^". > > > > Yes, using %^ is what I think would be the optimal solution. Other > > than that, in your proposed patch, operand-less %_ scans the entire > > current_output_insn to dig to the UNSPEC_VSIBADDR. You can just use > > operand substitution, and do e.g. "%X2vgatherpf0..." where 'X' > > processes operand 2 (vsib_address_operand) and conditionally outputs > > addr32. > > > > BTW: In a new version of the patch, please specify what is changed > > from the previous version. Otherwise, review of a new version is more > > or less a guesswork what changed. > > > > Here is the updated patch. The change is > > return "%P5vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; > > instead of > > return "%^vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
Did I miss some version of the patch that introduced %^? You used %_ in your previous patch. Did your try with %^? > We can't use the %X5 since %X5 is used on operands. So, please introduce some other modifier ("X" was not to be taken literally, but *some* letter). Why are you overloading 'P'? I don't know why are you using operand 5 here, you can use operand 2 directly. Uros. > I also added a test for -maddress-mode=long. > > > H.J. > --- > 32-bit indices in VSIB address are sign-extended to 64 bits. In x32, > when 32-bit indices are used as addresses, like in > > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6 > > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which > is invalid address. Add addr32 prefix to UNSPEC_VSIBADDR instructions > for x32 if there is no base register nor symbol. > > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with > > -Ofast -funroll-loops -march=haswell > > gcc/ > > PR target/89523 > * config/i386/i386.c (ix86_print_operand): Handle UNSPEC_VSIBADDR > instructions for '%P' to add addr32 prefix if required. > * config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend > "%P5" to opcode. > (*avx512pf_gatherpf<mode>df_mask): Likewise. > (*avx512pf_scatterpf<mode>sf_mask): Likewise. > (*avx512pf_scatterpf<mode>df_mask): Likewise. > (*avx2_gathersi<mode>): Prepend "%P7" to opcode. > (*avx2_gathersi<mode>_2): Prepend "%P6" to opcode. > (*avx2_gatherdi<mode>): Prepend "%P7" to opcode. > (*avx2_gatherdi<mode>_2): Prepend "%P6" to opcode. > (*avx2_gatherdi<mode>_3): Prepend "%P7" to opcode. > (*avx2_gatherdi<mode>_4): Prepend "%P6" to opcode.` > (*avx512f_gathersi<mode>): Prepend "%P5" to opcode. > (*avx512f_gathersi<mode>_2): Prepend "%P6" to opcode. > (*avx512f_gatherdi<mode>): Prepend "%P5" to opcode. > (*avx512f_gatherdi<mode>_2): Likewise. > (*avx512f_scattersi<mode>): Likewise. > (*avx512f_scatterdi<mode>): Likewise. > > gcc/testsuite/ > > PR target/89523 > * gcc.target/i386/pr89523-1a.c: New test. > * gcc.target/i386/pr89523-1b.c: Likewise. > * gcc.target/i386/pr89523-2.c: Likewise. > * gcc.target/i386/pr89523-3.c: Likewise. > * gcc.target/i386/pr89523-4.c: Likewise. > * gcc.target/i386/pr89523-5.c: Likewise. > * gcc.target/i386/pr89523-6.c: Likewise. > * gcc.target/i386/pr89523-7.c: Likewise. > * gcc.target/i386/pr89523-8.c: Likewise. > * gcc.target/i386/pr89523-9.c: Likewise. > --- > gcc/config/i386/i386.c | 35 +++++++++++++++- > gcc/config/i386/sse.md | 46 +++++++++++----------- > gcc/testsuite/gcc.target/i386/pr89523-1a.c | 24 +++++++++++ > gcc/testsuite/gcc.target/i386/pr89523-1b.c | 7 ++++ > gcc/testsuite/gcc.target/i386/pr89523-2.c | 37 +++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr89523-3.c | 36 +++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr89523-4.c | 36 +++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr89523-5.c | 39 ++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr89523-6.c | 38 ++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr89523-7.c | 42 ++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr89523-8.c | 41 +++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr89523-9.c | 30 ++++++++++++++ > 12 files changed, 386 insertions(+), 25 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1a.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1b.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index c8f9957163b..ae9befb638d 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -17793,7 +17793,8 @@ print_reg (rtx x, int code, FILE *file) > y -- print "st(0)" instead of "st" as a register. > d -- print duplicated register operand for AVX instruction. > D -- print condition for SSE cmp instruction. > - P -- if PIC, print an @PLT suffix. > + P -- if PIC, print an @PLT suffix or print addr32 prefix for > + TARGET_X32 with UNSPEC_VSIBADDR operand. > p -- print raw symbol name. > X -- don't print any sort of PIC '@' suffix for a symbol. > & -- print some in-use local-dynamic symbol name. > @@ -18010,6 +18011,37 @@ ix86_print_operand (FILE *file, rtx x, int code) > output_operand_lossage ("invalid operand size for operand code > 'Z'"); > return; > > + case 'P': > + if (MEM_P (x)) > + { > + x = XEXP (x, 0); > + if (GET_CODE (x) == UNSPEC > + || XINT (x, 1) == UNSPEC_VSIBADDR) > + { > + if (TARGET_X32) > + { > + /* NB: 32-bit indices in VSIB address are > + sign-extended to 64 bits. In x32, if 32-bit > + address 0xf7fa3010 is sign-extended to > + 0xfffffffff7fa3010 which is invalid address. > + Add addr32 prefix if there is no base register > + nor symbol. */ > + bool ok; > + struct ix86_address parts; > + ok = ix86_decompose_address (XVECEXP (x, 0, 0), > + &parts); > + gcc_assert (ok && parts.index == NULL_RTX); > + if (parts.base == NULL_RTX > + && (parts.disp == NULL_RTX > + || !symbolic_operand (parts.disp, > + GET_MODE (parts.disp)))) > + fputs ("addr32 ", file); > + } > + return; > + } > + } > + break; > + > case 'd': > case 'b': > case 'w': > @@ -18021,7 +18053,6 @@ ix86_print_operand (FILE *file, rtx x, int code) > case 'y': > case 'x': > case 'X': > - case 'P': > case 'p': > case 'V': > break; > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index ac299495b2c..ac500f9cc63 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -17401,9 +17401,9 @@ > case 3: > /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as > gas changed what it requires incompatibly. */ > - return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; > + return "%P5vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; > case 2: > - return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; > + return "%P5vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; > default: > gcc_unreachable (); > } > @@ -17448,9 +17448,9 @@ > case 3: > /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as > gas changed what it requires incompatibly. */ > - return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; > + return "%P5vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; > case 2: > - return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; > + return "%P5vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; > default: > gcc_unreachable (); > } > @@ -17496,10 +17496,10 @@ > case 7: > /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as > gas changed what it requires incompatibly. */ > - return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; > + return "%P5vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; > case 2: > case 6: > - return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; > + return "%P5vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}"; > default: > gcc_unreachable (); > } > @@ -17545,10 +17545,10 @@ > case 7: > /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as > gas changed what it requires incompatibly. */ > - return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; > + return "%P5vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; > case 2: > case 6: > - return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; > + return "%P5vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}"; > default: > gcc_unreachable (); > } > @@ -20292,7 +20292,7 @@ > UNSPEC_GATHER)) > (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] > "TARGET_AVX2" > - "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}" > + "%P7v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "vex") > (set_attr "mode" "<sseinsnmode>")]) > @@ -20312,7 +20312,7 @@ > UNSPEC_GATHER)) > (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] > "TARGET_AVX2" > - "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}" > + "%P6v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "vex") > (set_attr "mode" "<sseinsnmode>")]) > @@ -20353,7 +20353,7 @@ > UNSPEC_GATHER)) > (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] > "TARGET_AVX2" > - "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}" > + "%P7v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "vex") > (set_attr "mode" "<sseinsnmode>")]) > @@ -20375,8 +20375,8 @@ > "TARGET_AVX2" > { > if (<MODE>mode != <VEC_GATHER_SRCDI>mode) > - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, > %4}"; > - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"; > + return "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, > %4}"; > + return "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"; > } > [(set_attr "type" "ssemov") > (set_attr "prefix" "vex") > @@ -20400,7 +20400,7 @@ > (const_int 2) (const_int 3)]))) > (clobber (match_scratch:VI4F_256 1 "=&x"))] > "TARGET_AVX2" > - "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}" > + "%P7v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "vex") > (set_attr "mode" "<sseinsnmode>")]) > @@ -20423,7 +20423,7 @@ > (const_int 2) (const_int 3)]))) > (clobber (match_scratch:VI4F_256 1 "=&x"))] > "TARGET_AVX2" > - "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}" > + "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "vex") > (set_attr "mode" "<sseinsnmode>")]) > @@ -20463,7 +20463,7 @@ > "TARGET_AVX512F" > ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as > ;; gas changed what it requires incompatibly. > - "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}" > + "%P6v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > @@ -20484,7 +20484,7 @@ > "TARGET_AVX512F" > ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as > ;; gas changed what it requires incompatibly. > - "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}" > + "%P5v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > @@ -20525,7 +20525,7 @@ > "TARGET_AVX512F" > ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as > ;; gas changed what it requires incompatibly. > - "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}" > + "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > @@ -20550,11 +20550,11 @@ > if (<MODE>mode != <VEC_GATHER_SRCDI>mode) > { > if (<MODE_SIZE> != 64) > - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, > %x0%{%1%}|%x0%{%1%}, %X5}"; > + return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, > %x0%{%1%}|%x0%{%1%}, %X5}"; > else > - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, > %t0%{%1%}|%t0%{%1%}, %X5}"; > + return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, > %t0%{%1%}|%t0%{%1%}, %X5}"; > } > - return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, > %X5}"; > + return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, > %X5}"; > } > [(set_attr "type" "ssemov") > (set_attr "prefix" "evex") > @@ -20593,7 +20593,7 @@ > "TARGET_AVX512F" > ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as > ;; gas changed what it requires incompatibly. > - "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}" > + "%P5v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > @@ -20631,7 +20631,7 @@ > "TARGET_AVX512F" > ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as > ;; gas changed what it requires incompatibly. > - "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}" > + "%P5v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}" > [(set_attr "type" "ssemov") > (set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1a.c > b/gcc/testsuite/gcc.target/i386/pr89523-1a.c > new file mode 100644 > index 00000000000..0d0edab0363 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89523-1a.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-require-effective-target maybe_x32 } */ > +/* { dg-options "-maddress-mode=short -mx32 -Ofast -funroll-loops > -march=haswell" } */ > +/* { dg-final { scan-assembler-not "\tvgather" } } */ > +/* { dg-final { scan-assembler "addr32 vgather" } } */ > + > +void foo (void); > + > +extern float *ncost; > + > +float > +bar (int type, int num) > +{ > + int i; > + float cost; > + > + cost = 0; > + for (i = 0; i < num; i++) > + if (type) > + cost += ncost[i]; > + else > + foo (); > + return (cost); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1b.c > b/gcc/testsuite/gcc.target/i386/pr89523-1b.c > new file mode 100644 > index 00000000000..6a5c1d43625 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89523-1b.c > @@ -0,0 +1,7 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-require-effective-target maybe_x32 } */ > +/* { dg-options "-maddress-mode=long -mx32 -Ofast -funroll-loops > -march=haswell" } */ > +/* { dg-final { scan-assembler-not "\tvgather" } } */ > +/* { dg-final { scan-assembler "addr32 vgather" } } */ > + > +#include "pr89523-1a.c" > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c > b/gcc/testsuite/gcc.target/i386/pr89523-2.c > new file mode 100644 > index 00000000000..2ffbffe5e40 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c > @@ -0,0 +1,37 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-require-effective-target maybe_x32 } */ > +/* { dg-options "-mx32 -O2 -march=haswell" } */ > +/* { dg-final { scan-assembler "\tvgather" } } */ > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */ > + > +typedef double __v2df __attribute__ ((__vector_size__ (16))); > +typedef int __v4si __attribute__ ((__vector_size__ (16))); > +typedef long long __v2di __attribute__ ((__vector_size__ (16))); > + > +typedef long long __m128i __attribute__ ((__vector_size__ (16), > __may_alias__)); > +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale) > +{ > + __v2df __zero = { 0.0, 0.0 }; > + __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero); > + __v2df x = x; > + > + return (__m128d) __builtin_ia32_gathersiv2df (x, > + __base, > + (__v4si)__index, > + __mask, > + __scale); > +} > + > +__m128d x; > +double *base; > +__m128i idx; > + > +void extern > +avx2_test (void) > +{ > + x = _mm_i32gather_pd (base, idx, 1); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c > b/gcc/testsuite/gcc.target/i386/pr89523-3.c > new file mode 100644 > index 00000000000..fc3631b694b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-require-effective-target maybe_x32 } */ > +/* { dg-options "-mx32 -O2 -march=haswell" } */ > +/* { dg-final { scan-assembler "\tvgather" } } */ > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */ > + > +typedef double __v2df __attribute__ ((__vector_size__ (16))); > +typedef int __v4si __attribute__ ((__vector_size__ (16))); > +typedef long long __v2di __attribute__ ((__vector_size__ (16))); > + > +typedef long long __m128i __attribute__ ((__vector_size__ (16), > __may_alias__)); > +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_i64gather_pd (double const *__base, __m128i __index, const int __scale) > +{ > + __v2df __zero = { 0.0, 0.0 }; > + __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero); > + > + return (__m128d) __builtin_ia32_gatherdiv2df (__zero, > + __base, > + (__v2di)__index, > + __mask, > + __scale); > +} > + > +__m128d x; > +double *base; > +__m128i idx; > + > +void extern > +avx2_test (void) > +{ > + x = _mm_i64gather_pd (base, idx, 1); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c > b/gcc/testsuite/gcc.target/i386/pr89523-4.c > new file mode 100644 > index 00000000000..3436e5dcae3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-require-effective-target maybe_x32 } */ > +/* { dg-options "-mx32 -O2 -march=haswell" } */ > +/* { dg-final { scan-assembler-not "\tvgather" } } */ > +/* { dg-final { scan-assembler "addr32 vgather" } } */ > + > +typedef double __v2df __attribute__ ((__vector_size__ (16))); > +typedef int __v4si __attribute__ ((__vector_size__ (16))); > +typedef long long __v2di __attribute__ ((__vector_size__ (16))); > + > +typedef long long __m128i __attribute__ ((__vector_size__ (16), > __may_alias__)); > +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale) > +{ > + __v2df __zero = { 0.0, 0.0 }; > + __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero); > + __v2df x = x; > + > + return (__m128d) __builtin_ia32_gathersiv2df (x, > + __base, > + (__v4si)__index, > + __mask, > + __scale); > +} > + > +__m128d x; > +__m128i idx; > + > +void extern > +avx2_test (void) > +{ > + x = _mm_i32gather_pd ((void *) 0, idx, 1); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c > b/gcc/testsuite/gcc.target/i386/pr89523-5.c > new file mode 100644 > index 00000000000..6a769c7a249 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c > @@ -0,0 +1,39 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-require-effective-target maybe_x32 } */ > +/* { dg-options "-mx32 -O2 -mavx512pf" } */ > +/* { dg-final { scan-assembler "\tvgather" } } */ > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */ > + > +typedef int __v8si __attribute__ ((__vector_size__ (32))); > +typedef long long __m256i __attribute__ ((__vector_size__ (32), > + __may_alias__)); > +typedef unsigned char __mmask8; > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr, > + int __scale, int __hint) > +{ > + __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr, > + __scale, __hint); > +} > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask, > + void const *__addr, int __scale, int > __hint) > +{ > + __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale, > + __hint); > +} > + > +volatile __m256i idx; > +volatile __mmask8 m8; > +void *base; > + > +void extern > +avx512pf_test (void) > +{ > + _mm512_prefetch_i32gather_pd (idx, base, 8, 3); > + _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 3); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c > b/gcc/testsuite/gcc.target/i386/pr89523-6.c > new file mode 100644 > index 00000000000..82f795e085c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c > @@ -0,0 +1,38 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-require-effective-target maybe_x32 } */ > +/* { dg-options "-mx32 -O2 -mavx512pf" } */ > +/* { dg-final { scan-assembler-not "\tvgather" } } */ > +/* { dg-final { scan-assembler "addr32 vgather" } } */ > + > +typedef int __v8si __attribute__ ((__vector_size__ (32))); > +typedef long long __m256i __attribute__ ((__vector_size__ (32), > + __may_alias__)); > +typedef unsigned char __mmask8; > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr, > + int __scale, int __hint) > +{ > + __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr, > + __scale, __hint); > +} > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask, > + void const *__addr, int __scale, int > __hint) > +{ > + __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale, > + __hint); > +} > + > +volatile __m256i idx; > +volatile __mmask8 m8; > + > +void extern > +avx512pf_test (void) > +{ > + _mm512_prefetch_i32gather_pd (idx, (void *) 0, 8, 3); > + _mm512_mask_prefetch_i32gather_pd (idx, m8, (void *) 0, 8, 3); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c > b/gcc/testsuite/gcc.target/i386/pr89523-7.c > new file mode 100644 > index 00000000000..030b00d268a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-require-effective-target maybe_x32 } */ > +/* { dg-options "-mx32 -O2 -mavx512f" } */ > +/* { dg-final { scan-assembler "\tvscatter" } } */ > +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */ > + > +typedef int __v8si __attribute__ ((__vector_size__ (32))); > +typedef double __v8df __attribute__ ((__vector_size__ (64))); > +typedef long long __m256i __attribute__ ((__vector_size__ (32), > + __may_alias__)); > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); > +typedef unsigned char __mmask8; > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1, > + int __scale) > +{ > + __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF, > + (__v8si) __index, (__v8df) __v1, __scale); > +} > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask, > + __m256i __index, __m512d __v1, int __scale) > +{ > + __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index, > + (__v8df) __v1, __scale); > +} > + > +volatile __m512d src; > +volatile __m256i idx; > +volatile __mmask8 m8; > +double *addr; > + > +void extern > +avx512f_test (void) > +{ > + _mm512_i32scatter_pd (addr, idx, src, 8); > + _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c > b/gcc/testsuite/gcc.target/i386/pr89523-8.c > new file mode 100644 > index 00000000000..465c985c2b7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c > @@ -0,0 +1,41 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-require-effective-target maybe_x32 } */ > +/* { dg-options "-mx32 -O2 -mavx512f" } */ > +/* { dg-final { scan-assembler "\tvscatter" } } */ > +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */ > + > +typedef long long __v8di __attribute__ ((__vector_size__ (64))); > +typedef double __v8df __attribute__ ((__vector_size__ (64))); > +typedef long long __m512i __attribute__ ((__vector_size__ (64), > __may_alias__)); > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); > +typedef unsigned char __mmask8; > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1, > + int __scale) > +{ > + __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF, > + (__v8di) __index, (__v8df) __v1, __scale); > +} > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask, > + __m512i __index, __m512d __v1, int __scale) > +{ > + __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index, > + (__v8df) __v1, __scale); > +} > + > +volatile __m512d src; > +volatile __m512i idx; > +volatile __mmask8 m8; > +double *addr; > + > +void extern > +avx512f_test (void) > +{ > + _mm512_i64scatter_pd (addr, idx, src, 8); > + _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c > b/gcc/testsuite/gcc.target/i386/pr89523-9.c > new file mode 100644 > index 00000000000..e9323126bd6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c > @@ -0,0 +1,30 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-require-effective-target maybe_x32 } */ > +/* { dg-options "-mx32 -O2 -mavx512f" } */ > +/* { dg-final { scan-assembler-not "\tvscatter" } } */ > +/* { dg-final { scan-assembler "addr32 vscatter" } } */ > + > +typedef int __v8si __attribute__ ((__vector_size__ (32))); > +typedef double __v8df __attribute__ ((__vector_size__ (64))); > +typedef long long __m256i __attribute__ ((__vector_size__ (32), > + __may_alias__)); > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); > +typedef unsigned char __mmask8; > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1, > + int __scale) > +{ > + __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF, > + (__v8si) __index, (__v8df) __v1, __scale); > +} > + > +volatile __m512d src; > +volatile __m256i idx; > + > +void extern > +avx512f_test (void) > +{ > + _mm512_i32scatter_pd ((void *) 0, idx, src, 8); > +} > -- > 2.20.1 >