On Mon, Nov 10, 2025 at 3:35 PM Hu, Lin1 <[email protected]> wrote: > > Hi, > > The AMX intrinsics previously used string concatenation with the '#' > operator to construct register names, which prevented their use with > C++ template non-type parameters. This patch converts all AMX intrinsics > to use inline assembly constraints with the %c format specifier. > > And Intel style registers also have % prefix, update Intel syntax to use plain > register names without % preifx. > > Bootstrapped and regtested on x86_64-linux-pc, ok for trunk? Ok. Please also backport to the release branch(for the related code, and with bootstrap and regtest on the release branch). > > BRs, > Lin > > gcc/ChangeLog: > > PR target/122446 > * config/i386/amxavx512intrin.h (_tile_cvtrowps2bf16hi_internal): > Input register name by inline asm %c[...], and remove %% before tmm > from intel side. > (_tile_cvtrowps2bf16li_internal): Ditto. > * config/i386/amxbf16intrin.h (_tile_dpbf16ps_internal): Ditto > * config/i386/amxcomplexintrin.h (_tile_cmmimfp16ps_internal): Ditto > (_tile_cmmrlfp16ps_internal): Ditto > (_tile_cmmimfp16ps): Ditto > (_tile_cmmrlfp16ps): Ditto > * config/i386/amxfp16intrin.h (_tile_dpfp16ps_internal): Ditto > (_tile_dpfp16ps): Ditto > * config/i386/amxfp8intrin.h (_tile_dpbf8ps_internal): Ditto > (_tile_dpbhf8ps_internal): Ditto > (_tile_dphbf8ps_internal): Ditto > (_tile_dphf8ps_internal): Ditto > (_tile_dpbf8ps): Ditto > (_tile_dpbhf8ps): Ditto > (_tile_dphbf8ps): Ditto > (_tile_dphf8ps): Ditto > * config/i386/amxint8intrin.h (_tile_int8_dp_internal): Ditto > * config/i386/amxmovrsintrin.h (_tile_loaddrs_internal): Ditto > (_tile_loaddrst1_internal): Ditto > (_tile_loaddrs): Ditto > (_tile_loaddrst1): Ditto > * config/i386/amxtf32intrin.h (_tile_mmultf32ps_internal): Ditto > * config/i386/amxtileintrin.h (_tile_loadd): Ditto > (_tile_loadd_internal): Ditto > (_tile_stream_loadd): Ditto > (_tile_stream_loadd_internal): Ditto > (_tile_stored): Ditto > (_tile_stored_internal): Ditto > (_tile_zero): Ditto > (_tile_zero_internal): Ditto > > gcc/testsuite/ChangeLog: > > PR target/122446 > * gcc.target/i386/amxbf16-asmintel-1.c: Modify dg-final to check intel > form. > * gcc.target/i386/amxcomplex-asmintel-1.c: Ditto. > * gcc.target/i386/amxfp16-asmintel-1.c: Ditto. > * gcc.target/i386/amxfp8-asmintel-1.c: Ditto. > * gcc.target/i386/amxint8-asmintel-1.c: Ditto. > * gcc.target/i386/amxmovrs-asmintel-1.c: Ditto. > * gcc.target/i386/amxtf32-asmintel-1.c: Ditto. > * gcc.target/i386/amxtile-asmintel-1.c: Ditto. > * g++.target/i386/pr122446-1.C: New test. > * g++.target/i386/pr122446-amxavx512.C: Ditto. > * g++.target/i386/pr122446-amxbf16.C: Ditto. > * g++.target/i386/pr122446-amxcomplex.C: Ditto. > * g++.target/i386/pr122446-amxfp16.C: Ditto. > * g++.target/i386/pr122446-amxfp8.C: Ditto. > * g++.target/i386/pr122446-amxint8.C: Ditto. > * g++.target/i386/pr122446-amxmovrs.C: Ditto. > * g++.target/i386/pr122446-amxtf32.C: Ditto. > * g++.target/i386/pr122446-amxtile.C: Ditto. > --- > gcc/config/i386/amxavx512intrin.h | 57 ++++++---- > gcc/config/i386/amxbf16intrin.h | 6 +- > gcc/config/i386/amxcomplexintrin.h | 18 +-- > gcc/config/i386/amxfp16intrin.h | 8 +- > gcc/config/i386/amxfp8intrin.h | 38 ++++--- > gcc/config/i386/amxint8intrin.h | 4 +- > gcc/config/i386/amxmovrsintrin.h | 20 ++-- > gcc/config/i386/amxtf32intrin.h | 6 +- > gcc/config/i386/amxtileintrin.h | 20 ++-- > gcc/testsuite/g++.target/i386/pr122446-1.C | 17 +++ > .../g++.target/i386/pr122446-amxavx512.C | 104 ++++++++++++++++++ > .../g++.target/i386/pr122446-amxbf16.C | 16 +++ > .../g++.target/i386/pr122446-amxcomplex.C | 24 ++++ > .../g++.target/i386/pr122446-amxfp16.C | 16 +++ > .../g++.target/i386/pr122446-amxfp8.C | 40 +++++++ > .../g++.target/i386/pr122446-amxint8.C | 40 +++++++ > .../g++.target/i386/pr122446-amxmovrs.C | 31 ++++++ > .../g++.target/i386/pr122446-amxtf32.C | 16 +++ > .../g++.target/i386/pr122446-amxtile.C | 50 +++++++++ > .../gcc.target/i386/amxbf16-asmintel-1.c | 2 +- > .../gcc.target/i386/amxcomplex-asmintel-1.c | 4 +- > .../gcc.target/i386/amxfp16-asmintel-1.c | 2 +- > .../gcc.target/i386/amxfp8-asmintel-1.c | 8 +- > .../gcc.target/i386/amxint8-asmintel-1.c | 8 +- > .../gcc.target/i386/amxmovrs-asmintel-1.c | 4 +- > .../gcc.target/i386/amxtf32-asmintel-1.c | 2 +- > .../gcc.target/i386/amxtile-asmintel-1.c | 8 +- > 27 files changed, 476 insertions(+), 93 deletions(-) > create mode 100644 gcc/testsuite/g++.target/i386/pr122446-1.C > create mode 100644 gcc/testsuite/g++.target/i386/pr122446-amxavx512.C > create mode 100644 gcc/testsuite/g++.target/i386/pr122446-amxbf16.C > create mode 100644 gcc/testsuite/g++.target/i386/pr122446-amxcomplex.C > create mode 100644 gcc/testsuite/g++.target/i386/pr122446-amxfp16.C > create mode 100644 gcc/testsuite/g++.target/i386/pr122446-amxfp8.C > create mode 100644 gcc/testsuite/g++.target/i386/pr122446-amxint8.C > create mode 100644 gcc/testsuite/g++.target/i386/pr122446-amxmovrs.C > create mode 100644 gcc/testsuite/g++.target/i386/pr122446-amxtf32.C > create mode 100644 gcc/testsuite/g++.target/i386/pr122446-amxtile.C > > diff --git a/gcc/config/i386/amxavx512intrin.h > b/gcc/config/i386/amxavx512intrin.h > index ab5362571d1..1e284607572 100644 > --- a/gcc/config/i386/amxavx512intrin.h > +++ b/gcc/config/i386/amxavx512intrin.h > @@ -39,8 +39,9 @@ > ({ \ > __m512 dst; \ > __asm__ volatile \ > - ("{tcvtrowd2ps\t%1, %%tmm"#src", %0|tcvtrowd2ps\t%0, %%tmm"#src", %1}" > \ > - : "=v" (dst) : "r" ((unsigned) (A))); \ > + ("{tcvtrowd2ps\t%1, %%tmm%c[_src], %0 > \ > + |tcvtrowd2ps\t%0, tmm%c[_src], %1}" > \ > + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ > dst; \ > }) > > @@ -48,8 +49,9 @@ > ({ \ > __m512 dst; \ > __asm__ volatile \ > - ("{tcvtrowd2ps\t$"#imm", %%tmm"#src", %0|tcvtrowd2ps\t%0, %%tmm"#src", > "#imm"}" \ > - : "=v" (dst) :); \ > + ("{tcvtrowd2ps\t%[_imm], %%tmm%c[_src], %0 \ > + |tcvtrowd2ps\t%0, tmm%c[_src], %[_imm]}" \ > + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ > dst; \ > }) > > @@ -57,8 +59,9 @@ > ({ \ > __m512bh dst; > \ > __asm__ volatile \ > - ("{tcvtrowps2bf16h\t%1, %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", > %1}" \ > - : "=v" (dst) : "r" ((unsigned) (A))); \ > + ("{tcvtrowps2bf16h\t%1, %%tmm%c[_src], %0 \ > + |tcvtrowps2bf16h\t%0, tmm%c[_src], %1}" \ > + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ > dst; \ > }) > > @@ -66,8 +69,9 @@ > ({ \ > __m512bh dst; > \ > __asm__ volatile \ > - ("{tcvtrowps2bf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16h\t%0, > %%tmm"#src", "#imm"}" \ > - : "=v" (dst) :); \ > + ("{tcvtrowps2bf16h\t%[_imm], %%tmm%c[_src], %0 \ > + |tcvtrowps2bf16h\t%0, tmm%c[_src], %[_imm]}" \ > + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ > dst; \ > }) > > @@ -75,8 +79,9 @@ > ({ \ > __m512bh dst; > \ > __asm__ volatile \ > - ("{tcvtrowps2bf16l\t%1, %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#src", > %1}" \ > - : "=v" (dst) : "r" ((unsigned) (A))); \ > + ("{tcvtrowps2bf16l\t%1, %%tmm%c[_src], %0 \ > + |tcvtrowps2bf16l\t%0, tmm%c[_src], %1}" \ > + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ > dst; \ > }) > > @@ -84,8 +89,9 @@ > ({ \ > __m512bh dst; > \ > __asm__ volatile \ > - ("{tcvtrowps2bf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16l\t%0, > %%tmm"#src", "#imm"}" \ > - : "=v" (dst) :); \ > + ("{tcvtrowps2bf16l\t%[_imm], %%tmm%c[_src], %0 \ > + |tcvtrowps2bf16l\t%0, tmm%c[_src], "#imm"}" > \ > + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ > dst; \ > }) > > @@ -93,8 +99,8 @@ > ({ \ > __m512h dst; \ > __asm__ volatile \ > - ("{tcvtrowps2phh\t%1, %%tmm"#src", %0|tcvtrowps2phh\t%0, %%tmm"#src", %1}" > \ > - : "=v" (dst) : "r" ((unsigned) (A))); \ > + ("{tcvtrowps2phh\t%1, %%tmm%c[_src], %0|tcvtrowps2phh\t%0, tmm%c[_src], > %1}" \ > + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ > dst; \ > }) > > @@ -102,8 +108,9 @@ > ({ \ > __m512h dst; \ > __asm__ volatile \ > - ("{tcvtrowps2phh\t$"#imm", %%tmm"#src", %0|tcvtrowps2phh\t%0, %%tmm"#src", > "#imm"}" \ > - : "=v" (dst) :); \ > + ("{tcvtrowps2phh\t%[_imm], %%tmm%c[_src], %0 \ > + |tcvtrowps2phh\t%0, tmm%c[_src], "#imm"}" \ > + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ > dst; \ > }) > > @@ -111,8 +118,8 @@ > ({ \ > __m512h dst; \ > __asm__ volatile \ > - ("{tcvtrowps2phl\t%1, %%tmm"#src", %0|tcvtrowps2phl\t%0, %%tmm"#src", %1}" > \ > - : "=v" (dst) : "r" ((unsigned) (A))); \ > + ("{tcvtrowps2phl\t%1, %%tmm%c[_src], %0|tcvtrowps2phl\t%0, tmm%c[_src], > %1}" \ > + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ > dst; \ > }) > > @@ -120,8 +127,9 @@ > ({ \ > __m512h dst; \ > __asm__ volatile \ > - ("{tcvtrowps2phl\t$"#imm", %%tmm"#src", %0|tcvtrowps2phl\t%0, %%tmm"#src", > "#imm"}" \ > - : "=v" (dst) :); \ > + ("{tcvtrowps2phl\t%[_imm], %%tmm%c[_src], %0 \ > + |tcvtrowps2phl\t%0, tmm%c[_src], "#imm"}" \ > + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ > dst; \ > }) > > @@ -129,8 +137,8 @@ > ({ \ > __m512 dst; \ > __asm__ volatile \ > - ("{tilemovrow\t%1, %%tmm"#src", %0|tilemovrow\t%0, %%tmm"#src", %1}" \ > - : "=v" (dst) : "r" ((unsigned) (A))); \ > + ("{tilemovrow\t%1, %%tmm%c[_src], %0|tilemovrow\t%0, tmm%c[_src], %1}" \ > + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ > dst; \ > }) > > @@ -138,8 +146,9 @@ > ({ \ > __m512 dst; \ > __asm__ volatile \ > - ("{tilemovrow\t$"#imm", %%tmm"#src", %0|tilemovrow\t%0, %%tmm"#src", > "#imm"}" \ > - : "=v" (dst) :); \ > + ("{tilemovrow\t%[_imm], %%tmm%c[_src], %0 \ > + |tilemovrow\t%0, tmm%c[_src], "#imm"}" \ > + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ > dst; \ > }) > > diff --git a/gcc/config/i386/amxbf16intrin.h b/gcc/config/i386/amxbf16intrin.h > index 9f4a9d1fa3e..b2792bb53e3 100644 > --- a/gcc/config/i386/amxbf16intrin.h > +++ b/gcc/config/i386/amxbf16intrin.h > @@ -36,8 +36,10 @@ > > #if defined(__x86_64__) > #define _tile_dpbf16ps_internal(dst,src1,src2) > \ > - __asm__ volatile\ > - ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", > %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) > + __asm__ volatile > \ > + ("{tdpbf16ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] > \ > + |tdpbf16ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" > \ > + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) > > #define _tile_dpbf16ps(dst,src1,src2) \ > _tile_dpbf16ps_internal (dst, src1, src2) > diff --git a/gcc/config/i386/amxcomplexintrin.h > b/gcc/config/i386/amxcomplexintrin.h > index fc5964f4af5..55b7d532557 100644 > --- a/gcc/config/i386/amxcomplexintrin.h > +++ b/gcc/config/i386/amxcomplexintrin.h > @@ -35,13 +35,17 @@ > #endif /* __AMX_COMPLEX__ */ > > #if defined(__x86_64__) > -#define _tile_cmmimfp16ps_internal(src1_dst,src2,src3) > \ > - __asm__ volatile\ > - ("{tcmmimfp16ps\t%%tmm"#src3", %%tmm"#src2", > %%tmm"#src1_dst"|tcmmimfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" > ::) > - > -#define _tile_cmmrlfp16ps_internal(src1_dst,src2,src3) > \ > - __asm__ volatile\ > - ("{tcmmrlfp16ps\t%%tmm"#src3", %%tmm"#src2", > %%tmm"#src1_dst"|tcmmrlfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" > ::) > +#define _tile_cmmimfp16ps_internal(src1_dst,src2,src3) \ > + __asm__ volatile \ > + ("{tcmmimfp16ps\t%%tmm%c[_src3], %%tmm%c[_src2], %%tmm%c[_src1_dst] \ > + |tcmmimfp16ps\ttmm%c[_src1_dst], tmm%c[_src2], tmm%c[_src3]}" \ > + :: [_src1_dst]"i"(src1_dst), [_src2]"i"(src2), [_src3]"i"(src3)) > + > +#define _tile_cmmrlfp16ps_internal(src1_dst,src2,src3) \ > + __asm__ volatile \ > + ("{tcmmrlfp16ps\t%%tmm%c[_src3], %%tmm%c[_src2], %%tmm%c[_src1_dst] \ > + |tcmmrlfp16ps\ttmm%c[_src1_dst], tmm%c[_src2], tmm%c[_src3]}" \ > + :: [_src1_dst]"i"(src1_dst), [_src2]"i"(src2), [_src3]"i"(src3)) > > #define _tile_cmmimfp16ps(src1_dst,src2,src3) > \ > _tile_cmmimfp16ps_internal (src1_dst, src2, src3) > diff --git a/gcc/config/i386/amxfp16intrin.h b/gcc/config/i386/amxfp16intrin.h > index 02fd031cddd..1e0ef278d60 100644 > --- a/gcc/config/i386/amxfp16intrin.h > +++ b/gcc/config/i386/amxfp16intrin.h > @@ -29,9 +29,11 @@ > #define _AMXFP16INTRIN_H_INCLUDED > > #if defined(__x86_64__) > -#define _tile_dpfp16ps_internal(dst,src1,src2) \ > - __asm__ volatile \ > - ("{tdpfp16ps\t%%tmm"#src2", %%tmm"#src1", > %%tmm"#dst"|tdpfp16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) > +#define _tile_dpfp16ps_internal(dst,src1,src2) \ > + __asm__ volatile \ > + ("{tdpfp16ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ > + |tdpfp16ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ > + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) > > #define _tile_dpfp16ps(dst,src1,src2) \ > _tile_dpfp16ps_internal (dst,src1,src2) > diff --git a/gcc/config/i386/amxfp8intrin.h b/gcc/config/i386/amxfp8intrin.h > index 8952be96dec..9467f53b1ba 100644 > --- a/gcc/config/i386/amxfp8intrin.h > +++ b/gcc/config/i386/amxfp8intrin.h > @@ -29,21 +29,29 @@ > #define _AMXFP8INTRIN_H_INCLUDED > > #if defined(__x86_64__) > -#define _tile_dpbf8ps_internal(dst,src1,src2) \ > - __asm__ volatile \ > - ("{tdpbf8ps\t%%tmm"#src2", %%tmm"#src1", > %%tmm"#dst"|tdpbf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) > - > -#define _tile_dpbhf8ps_internal(dst,src1,src2) \ > - __asm__ volatile \ > - ("{tdpbhf8ps\t%%tmm"#src2", %%tmm"#src1", > %%tmm"#dst"|tdpbhf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) > - > -#define _tile_dphbf8ps_internal(dst,src1,src2) \ > - __asm__ volatile \ > - ("{tdphbf8ps\t%%tmm"#src2", %%tmm"#src1", > %%tmm"#dst"|tdphbf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) > - > -#define _tile_dphf8ps_internal(dst,src1,src2) \ > - __asm__ volatile \ > - ("{tdphf8ps\t%%tmm"#src2", %%tmm"#src1", > %%tmm"#dst"|tdphf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) > +#define _tile_dpbf8ps_internal(dst,src1,src2) \ > + __asm__ volatile \ > + ("{tdpbf8ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ > + |tdpbf8ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ > + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) > + > +#define _tile_dpbhf8ps_internal(dst,src1,src2) \ > + __asm__ volatile \ > + ("{tdpbhf8ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ > + |tdpbhf8ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ > + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) > + > +#define _tile_dphbf8ps_internal(dst,src1,src2) \ > + __asm__ volatile \ > + ("{tdphbf8ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ > + |tdphbf8ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ > + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) > + > +#define _tile_dphf8ps_internal(dst,src1,src2) \ > + __asm__ volatile \ > + ("{tdphf8ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ > + |tdphf8ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ > + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) > > #define _tile_dpbf8ps(dst,src1,src2) \ > _tile_dpbf8ps_internal (dst,src1,src2) > diff --git a/gcc/config/i386/amxint8intrin.h b/gcc/config/i386/amxint8intrin.h > index 332c8db9153..f7cb36cfce9 100644 > --- a/gcc/config/i386/amxint8intrin.h > +++ b/gcc/config/i386/amxint8intrin.h > @@ -37,7 +37,9 @@ > #if defined(__x86_64__) > #define _tile_int8_dp_internal(name,dst,src1,src2) > \ > __asm__ volatile \ > - ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", > %%tmm"#src1", %%tmm"#src2"}" ::) > + ("{"#name"\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ > + |"#name"\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ > + ::[_dst]"i"(dst),[_src1]"i"(src1),[_src2]"i"(src2)) > > #define _tile_dpbssd(dst,src1,src2) \ > _tile_int8_dp_internal (tdpbssd, dst, src1, src2) > diff --git a/gcc/config/i386/amxmovrsintrin.h > b/gcc/config/i386/amxmovrsintrin.h > index 93a2dbf2c3a..9f5d317abc3 100644 > --- a/gcc/config/i386/amxmovrsintrin.h > +++ b/gcc/config/i386/amxmovrsintrin.h > @@ -36,17 +36,17 @@ > #define __DISABLE_AMX_MOVRS__ > #endif /* __AMX_MOVRS__ */ > > -#define _tile_loaddrs_internal(tdst, base, stride) \ > -__asm__ volatile \ > - ("{tileloaddrs\t(%0,%1,1), %%tmm"#tdst \ > - "|tileloaddrs\t%%tmm"#tdst", [%0+%1*1]}" \ > - :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride))) > +#define _tile_loaddrs_internal(tdst, base, stride) \ > +__asm__ volatile \ > + ("{tileloaddrs\t(%0,%1,1), %%tmm%c[_tdst] \ > + |tileloaddrs\ttmm%c[_tdst], [%0+%1*1]}" \ > + :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)), > [_tdst]"i"(tdst)) > > -#define _tile_loaddrst1_internal(tdst, base, stride) \ > -__asm__ volatile \ > - ("{tileloaddrst1\t(%0,%1,1), %%tmm"#tdst \ > - "|tileloaddrst1\t%%tmm"#tdst", [%0+%1*1]}" \ > - :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride))) > +#define _tile_loaddrst1_internal(tdst, base, stride) \ > +__asm__ volatile \ > + ("{tileloaddrst1\t(%0,%1,1), %%tmm%c[_tdst] \ > + |tileloaddrst1\ttmm%c[_tdst], [%0+%1*1]}" \ > + :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)), > [_tdst]"i"(tdst)) > > #define _tile_loaddrs(tdst, base, stride) \ > _tile_loaddrs_internal(tdst, base, stride) > diff --git a/gcc/config/i386/amxtf32intrin.h b/gcc/config/i386/amxtf32intrin.h > index 8ed910d94fa..a7a1f4ff08a 100644 > --- a/gcc/config/i386/amxtf32intrin.h > +++ b/gcc/config/i386/amxtf32intrin.h > @@ -31,8 +31,10 @@ > > #if defined(__x86_64__) > #define _tile_mmultf32ps_internal(src1_dst,src2,src3) \ > - __asm__ volatile\ > - ("{tmmultf32ps\t%%tmm"#src3", %%tmm"#src2", > %%tmm"#src1_dst"|tmmultf32ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" > ::) > + __asm__ volatile \ > + ("{tmmultf32ps\t%%tmm%c[_src3], %%tmm%c[_src2], %%tmm%c[_src1_dst] \ > + |tmmultf32ps\ttmm%c[_src1_dst], tmm%c[_src2], tmm%c[_src3]}" \ > + :: [_src1_dst]"i"(src1_dst), [_src2]"i"(src2), [_src3]"i"(src3)) > > #define _tile_mmultf32ps(src1_dst,src2,src3) \ > _tile_mmultf32ps_internal (src1_dst, src2, src3) > diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h > index 8c8e2cd9046..67c6b5391e4 100644 > --- a/gcc/config/i386/amxtileintrin.h > +++ b/gcc/config/i386/amxtileintrin.h > @@ -61,32 +61,32 @@ _tile_release (void) > > #define _tile_loadd_internal(dst,base,stride) \ > __asm__ volatile \ > - ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" \ > - :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride))) > + ("{tileloadd\t(%0,%1,1), %%tmm%c[_dst]|tileloadd\ttmm%c[_dst], [%0+%1*1]}" > \ > + :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)), > [_dst]"i"(dst)) > > #define _tile_stream_loadd(dst,base,stride) \ > _tile_stream_loadd_internal (dst, base, stride) > > #define _tile_stream_loadd_internal(dst,base,stride) \ > __asm__ volatile \ > - ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", > [%0+%1*1]}" \ > - :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride))) > + ("{tileloaddt1\t(%0,%1,1), %%tmm%c[_dst]|tileloaddt1\ttmm%c[_dst], > [%0+%1*1]}" \ > + :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)), > [_dst]"i"(dst)) > > #define _tile_stored(dst,base,stride) \ > _tile_stored_internal (dst, base, stride) > > #define _tile_stored_internal(src,base,stride) \ > __asm__ volatile \ > - ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" > \ > - :: "r" ((void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)) \ > - : "memory") > + ("{tilestored\t%%tmm%c[_src], (%0,%1,1)|tilestored\t[%0+%1*1], > tmm%c[_src]}" \ > + :: "r" ((void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)), [_src]"i"(src) > \ > + : "memory") > > #define _tile_zero(dst) \ > _tile_zero_internal (dst) > > -#define _tile_zero_internal(dst) \ > - __asm__ volatile \ > - ("tilezero\t%%tmm"#dst ::) > +#define _tile_zero_internal(dst) \ > + __asm__ volatile \ > + ("{tilezero\t%%tmm%c[_dst]|tilezero\ttmm%c[_dst]}" :: [_dst]"i"(dst)) > > #endif > > diff --git a/gcc/testsuite/g++.target/i386/pr122446-1.C > b/gcc/testsuite/g++.target/i386/pr122446-1.C > new file mode 100644 > index 00000000000..39e594f6936 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr122446-1.C > @@ -0,0 +1,17 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mamx-tile -mamx-int8 -O0" } */ > +/* { dg-final { scan-assembler "tdpbssd\[ > \\t]+\[^\n\]*%tmm2+\[^\n\]*%tmm1+\[^\n\]*%tmm0" } } */ > + > +#include <immintrin.h> > + > +template <int hello, int crazy, int gcc> > +struct dpbssd > +{ > + void operator()() { _tile_dpbssd(hello, crazy, gcc); } > +}; > + > +void f() > +{ > + dpbssd<0, 1, 2>()(); > +} > + > diff --git a/gcc/testsuite/g++.target/i386/pr122446-amxavx512.C > b/gcc/testsuite/g++.target/i386/pr122446-amxavx512.C > new file mode 100644 > index 00000000000..bf11a3616b6 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr122446-amxavx512.C > @@ -0,0 +1,104 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mamx-tile -mamx-avx512 -O0" } */ > +/* { dg-final { scan-assembler "tcvtrowd2ps\[ \\t]+%e.x,\[ \\t\]*%tmm1,\[ > \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tcvtrowd2ps\[ \\t]+\\\$5,\[ \\t\]*%tmm2,\[ > \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tcvtrowps2bf16h\[ \\t]+%e.x,\[ > \\t\]*%tmm1,\[ \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tcvtrowps2bf16h\[ \\t]+\\\$7,\[ > \\t\]*%tmm3,\[ \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tcvtrowps2bf16l\[ \\t]+%e.x,\[ > \\t\]*%tmm2,\[ \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tcvtrowps2bf16l\[ \\t]+\\\$3,\[ > \\t\]*%tmm4,\[ \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tcvtrowps2phh\[ \\t]+%e.x,\[ \\t\]*%tmm1,\[ > \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tcvtrowps2phh\[ \\t]+\\\$6,\[ \\t\]*%tmm2,\[ > \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tcvtrowps2phl\[ \\t]+%e.x,\[ \\t\]*%tmm3,\[ > \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tcvtrowps2phl\[ \\t]+\\\$2,\[ \\t\]*%tmm4,\[ > \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tilemovrow\[ \\t]+%e.x,\[ \\t\]*%tmm5,\[ > \\t\]*%zmm\[0-9\]+" } } */ > +/* { dg-final { scan-assembler "tilemovrow\[ \\t]+\\\$4,\[ \\t\]*%tmm6,\[ > \\t\]*%zmm\[0-9\]+" } } */ > + > +#include <immintrin.h> > + > +template <int tmm_num> > +struct tile_cvtrowd2ps_test > +{ > + __m512 operator()() { return _tile_cvtrowd2ps(tmm_num, 0); } > +}; > + > +template <int tmm_num, int imm> > +struct tile_cvtrowd2psi_test > +{ > + __m512 operator()() { return _tile_cvtrowd2psi(tmm_num, imm); } > +}; > + > +template <int tmm_num> > +struct tile_cvtrowps2bf16h_test > +{ > + __m512bh operator()() { return _tile_cvtrowps2bf16h(tmm_num, 0); } > +}; > + > +template <int tmm_num, int imm> > +struct tile_cvtrowps2bf16hi_test > +{ > + __m512bh operator()() { return _tile_cvtrowps2bf16hi(tmm_num, imm); } > +}; > + > +template <int tmm_num> > +struct tile_cvtrowps2bf16l_test > +{ > + __m512bh operator()() { return _tile_cvtrowps2bf16l(tmm_num, 0); } > +}; > + > +template <int tmm_num, int imm> > +struct tile_cvtrowps2bf16li_test > +{ > + __m512bh operator()() { return _tile_cvtrowps2bf16li(tmm_num, imm); } > +}; > + > +template <int tmm_num> > +struct tile_cvtrowps2phh_test > +{ > + __m512h operator()() { return _tile_cvtrowps2phh(tmm_num, 0); } > +}; > + > +template <int tmm_num, int imm> > +struct tile_cvtrowps2phhi_test > +{ > + __m512h operator()() { return _tile_cvtrowps2phhi(tmm_num, imm); } > +}; > + > +template <int tmm_num> > +struct tile_cvtrowps2phl_test > +{ > + __m512h operator()() { return _tile_cvtrowps2phl(tmm_num, 0); } > +}; > + > +template <int tmm_num, int imm> > +struct tile_cvtrowps2phli_test > +{ > + __m512h operator()() { return _tile_cvtrowps2phli(tmm_num, imm); } > +}; > + > +template <int tmm_num> > +struct tile_movrow_test > +{ > + __m512 operator()() { return _tile_movrow(tmm_num, 0); } > +}; > + > +template <int tmm_num, int imm> > +struct tile_movrowi_test > +{ > + __m512 operator()() { return _tile_movrowi(tmm_num, imm); } > +}; > + > +void test_amx_avx512() > +{ > + __m512 r1 = tile_cvtrowd2ps_test<1>()(); > + __m512 r2 = tile_cvtrowd2psi_test<2, 5>()(); > + __m512bh r3 = tile_cvtrowps2bf16h_test<1>()(); > + __m512bh r4 = tile_cvtrowps2bf16hi_test<3, 7>()(); > + __m512bh r5 = tile_cvtrowps2bf16l_test<2>()(); > + __m512bh r6 = tile_cvtrowps2bf16li_test<4, 3>()(); > + __m512h r7 = tile_cvtrowps2phh_test<1>()(); > + __m512h r8 = tile_cvtrowps2phhi_test<2, 6>()(); > + __m512h r9 = tile_cvtrowps2phl_test<3>()(); > + __m512h r10 = tile_cvtrowps2phli_test<4, 2>()(); > + __m512 r11 = tile_movrow_test<5>()(); > + __m512 r12 = tile_movrowi_test<6, 4>()(); > +} > diff --git a/gcc/testsuite/g++.target/i386/pr122446-amxbf16.C > b/gcc/testsuite/g++.target/i386/pr122446-amxbf16.C > new file mode 100644 > index 00000000000..8ee03911d2b > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr122446-amxbf16.C > @@ -0,0 +1,16 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mamx-tile -mamx-bf16 -O0" } */ > +/* { dg-final { scan-assembler "tdpbf16ps\[ \\t]+%tmm2,\[ \\t\]*%tmm1,\[ > \\t\]*%tmm0" } } */ > + > +#include <immintrin.h> > + > +template <int dst, int src1, int src2> > +struct dpbf16ps > +{ > + void operator()() { _tile_dpbf16ps(dst, src1, src2); } > +}; > + > +void test_amx_bf16() > +{ > + dpbf16ps<0, 1, 2>()(); > +} > diff --git a/gcc/testsuite/g++.target/i386/pr122446-amxcomplex.C > b/gcc/testsuite/g++.target/i386/pr122446-amxcomplex.C > new file mode 100644 > index 00000000000..3224b486f23 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr122446-amxcomplex.C > @@ -0,0 +1,24 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mamx-tile -mamx-complex -O0" } */ > +/* { dg-final { scan-assembler "tcmmimfp16ps\[ \\t]+%tmm2,\[ \\t\]*%tmm1,\[ > \\t\]*%tmm0" } } */ > +/* { dg-final { scan-assembler "tcmmrlfp16ps\[ \\t]+%tmm5,\[ \\t\]*%tmm4,\[ > \\t\]*%tmm3" } } */ > + > +#include <immintrin.h> > + > +template <int dst, int src1, int src2> > +struct cmmimfp16ps > +{ > + void operator()() { _tile_cmmimfp16ps(dst, src1, src2); } > +}; > + > +template <int dst, int src1, int src2> > +struct cmmrlfp16ps > +{ > + void operator()() { _tile_cmmrlfp16ps(dst, src1, src2); } > +}; > + > +void test_amx_complex() > +{ > + cmmimfp16ps<0, 1, 2>()(); > + cmmrlfp16ps<3, 4, 5>()(); > +} > diff --git a/gcc/testsuite/g++.target/i386/pr122446-amxfp16.C > b/gcc/testsuite/g++.target/i386/pr122446-amxfp16.C > new file mode 100644 > index 00000000000..7467cd9546d > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr122446-amxfp16.C > @@ -0,0 +1,16 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mamx-tile -mamx-fp16 -O0" } */ > +/* { dg-final { scan-assembler "tdpfp16ps\[ \\t]+%tmm2,\[ \\t\]*%tmm1,\[ > \\t\]*%tmm0" } } */ > + > +#include <immintrin.h> > + > +template <int dst, int src1, int src2> > +struct dpfp16ps > +{ > + void operator()() { _tile_dpfp16ps(dst, src1, src2); } > +}; > + > +void test_amx_fp16() > +{ > + dpfp16ps<0, 1, 2>()(); > +} > diff --git a/gcc/testsuite/g++.target/i386/pr122446-amxfp8.C > b/gcc/testsuite/g++.target/i386/pr122446-amxfp8.C > new file mode 100644 > index 00000000000..8e5af6c1e46 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr122446-amxfp8.C > @@ -0,0 +1,40 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mamx-tile -mamx-fp8 -O0" } */ > +/* { dg-final { scan-assembler "tdpbf8ps\[ \\t]+%tmm2,\[ \\t\]*%tmm1,\[ > \\t\]*%tmm0" } } */ > +/* { dg-final { scan-assembler "tdpbhf8ps\[ \\t]+%tmm5,\[ \\t\]*%tmm4,\[ > \\t\]*%tmm3" } } */ > +/* { dg-final { scan-assembler "tdphbf8ps\[ \\t]+%tmm2,\[ \\t\]*%tmm1,\[ > \\t\]*%tmm6" } } */ > +/* { dg-final { scan-assembler "tdphf8ps\[ \\t]+%tmm1,\[ \\t\]*%tmm0,\[ > \\t\]*%tmm7" } } */ > + > +#include <immintrin.h> > + > +template <int dst, int src1, int src2> > +struct dpbf8ps > +{ > + void operator()() { _tile_dpbf8ps(dst, src1, src2); } > +}; > + > +template <int dst, int src1, int src2> > +struct dpbhf8ps > +{ > + void operator()() { _tile_dpbhf8ps(dst, src1, src2); } > +}; > + > +template <int dst, int src1, int src2> > +struct dphbf8ps > +{ > + void operator()() { _tile_dphbf8ps(dst, src1, src2); } > +}; > + > +template <int dst, int src1, int src2> > +struct dphf8ps > +{ > + void operator()() { _tile_dphf8ps(dst, src1, src2); } > +}; > + > +void test_amx_fp8() > +{ > + dpbf8ps<0, 1, 2>()(); > + dpbhf8ps<3, 4, 5>()(); > + dphbf8ps<6, 1, 2>()(); > + dphf8ps<7, 0, 1>()(); > +} > diff --git a/gcc/testsuite/g++.target/i386/pr122446-amxint8.C > b/gcc/testsuite/g++.target/i386/pr122446-amxint8.C > new file mode 100644 > index 00000000000..039379222a5 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr122446-amxint8.C > @@ -0,0 +1,40 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mamx-tile -mamx-int8 -O0" } */ > +/* { dg-final { scan-assembler "tdpbssd\[ \\t]+%tmm2,\[ \\t\]*%tmm1,\[ > \\t\]*%tmm0" } } */ > +/* { dg-final { scan-assembler "tdpbsud\[ \\t]+%tmm5,\[ \\t\]*%tmm4,\[ > \\t\]*%tmm3" } } */ > +/* { dg-final { scan-assembler "tdpbusd\[ \\t]+%tmm2,\[ \\t\]*%tmm1,\[ > \\t\]*%tmm6" } } */ > +/* { dg-final { scan-assembler "tdpbuud\[ \\t]+%tmm1,\[ \\t\]*%tmm0,\[ > \\t\]*%tmm7" } } */ > + > +#include <immintrin.h> > + > +template <int dst, int src1, int src2> > +struct dpbssd > +{ > + void operator()() { _tile_dpbssd(dst, src1, src2); } > +}; > + > +template <int dst, int src1, int src2> > +struct dpbsud > +{ > + void operator()() { _tile_dpbsud(dst, src1, src2); } > +}; > + > +template <int dst, int src1, int src2> > +struct dpbusd > +{ > + void operator()() { _tile_dpbusd(dst, src1, src2); } > +}; > + > +template <int dst, int src1, int src2> > +struct dpbuud > +{ > + void operator()() { _tile_dpbuud(dst, src1, src2); } > +}; > + > +void test_amx_int8() > +{ > + dpbssd<0, 1, 2>()(); > + dpbsud<3, 4, 5>()(); > + dpbusd<6, 1, 2>()(); > + dpbuud<7, 0, 1>()(); > +} > diff --git a/gcc/testsuite/g++.target/i386/pr122446-amxmovrs.C > b/gcc/testsuite/g++.target/i386/pr122446-amxmovrs.C > new file mode 100644 > index 00000000000..09f651d57f3 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr122446-amxmovrs.C > @@ -0,0 +1,31 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mamx-tile -mamx-movrs -O0" } */ > +/* { dg-final { scan-assembler "tileloaddrs\[ \\t]+\[^\n\]*,\[ \\t\]*%tmm0" > } } */ > +/* { dg-final { scan-assembler "tileloaddrst1\[ \\t]+\[^\n\]*,\[ > \\t\]*%tmm1" } } */ > + > +#include <immintrin.h> > + > +template <int tmm_num> > +struct tile_loaddrs_test > +{ > + void operator()(const void* base, int stride) > + { > + _tile_loaddrs(tmm_num, base, stride); > + } > +}; > + > +template <int tmm_num> > +struct tile_loaddrst1_test > +{ > + void operator()(const void* base, int stride) > + { > + _tile_loaddrst1(tmm_num, base, stride); > + } > +}; > + > +void test_amx_movrs() > +{ > + char buf[1024]; > + tile_loaddrs_test<0>()(buf, 64); > + tile_loaddrst1_test<1>()(buf, 64); > +} > diff --git a/gcc/testsuite/g++.target/i386/pr122446-amxtf32.C > b/gcc/testsuite/g++.target/i386/pr122446-amxtf32.C > new file mode 100644 > index 00000000000..0d1220a4c91 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr122446-amxtf32.C > @@ -0,0 +1,16 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mamx-tile -mamx-tf32 -O0" } */ > +/* { dg-final { scan-assembler "tmmultf32ps\[ \\t]+%tmm2,\[ \\t\]*%tmm1,\[ > \\t\]*%tmm0" } } */ > + > +#include <immintrin.h> > + > +template <int dst, int src1, int src2> > +struct mmultf32ps > +{ > + void operator()() { _tile_mmultf32ps(dst, src1, src2); } > +}; > + > +void test_amx_tf32() > +{ > + mmultf32ps<0, 1, 2>()(); > +} > diff --git a/gcc/testsuite/g++.target/i386/pr122446-amxtile.C > b/gcc/testsuite/g++.target/i386/pr122446-amxtile.C > new file mode 100644 > index 00000000000..6836ba492d0 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/pr122446-amxtile.C > @@ -0,0 +1,50 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mamx-tile -O0" } */ > +/* { dg-final { scan-assembler "tileloadd\[ \\t]+\[^\n\]*,\[ \\t\]*%tmm0" } > } */ > +/* { dg-final { scan-assembler "tilestored\[ \\t]+%tmm1," } } */ > +/* { dg-final { scan-assembler "tilezero\[ \\t]+%tmm3" } } */ > +/* { dg-final { scan-assembler "tileloaddt1\[ \\t]+\[^\n\]*,\[ \\t\]*%tmm2" > } } */ > + > +#include <immintrin.h> > + > +template <int tmm_num> > +struct tile_loadd_test > +{ > + void operator()(const void* base, int stride) > + { > + _tile_loadd(tmm_num, base, stride); > + } > +}; > + > +template <int tmm_num> > +struct tile_stored_test > +{ > + void operator()(void* base, int stride) > + { > + _tile_stored(tmm_num, base, stride); > + } > +}; > + > +template <int tmm_num> > +struct tile_zero_test > +{ > + void operator()() { _tile_zero(tmm_num); } > +}; > + > +template <int tmm_num> > +struct tile_stream_loadd_test > +{ > + void operator()(const void* base, int stride) > + { > + _tile_stream_loadd(tmm_num, base, stride); > + } > +}; > + > +void test_amx_tile() > +{ > + char buf[1024]; > + tile_loadd_test<0>()(buf, 64); > + tile_stored_test<1>()(buf, 64); > + tile_stream_loadd_test<2>()(buf, 64); > + tile_zero_test<3>()(); > +} > diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c > b/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c > index 54194e1c5b0..80d129e77ed 100644 > --- a/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c > +++ b/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c > @@ -1,7 +1,7 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-require-effective-target masm_intel } */ > /* { dg-options "-O2 -mamx-bf16 -masm=intel" } */ > -/* { dg-final { scan-assembler "tdpbf16ps\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > +/* { dg-final { scan-assembler "tdpbf16ps\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > #include <immintrin.h> > > void TEST () > diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c > b/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c > index 305465e8860..51997b73e02 100644 > --- a/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c > +++ b/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c > @@ -1,8 +1,8 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-require-effective-target masm_intel } */ > /* { dg-options "-O2 -mamx-complex -masm=intel" } */ > -/* { dg-final { scan-assembler "tcmmimfp16ps\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > -/* { dg-final { scan-assembler "tcmmrlfp16ps\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > +/* { dg-final { scan-assembler "tcmmimfp16ps\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > +/* { dg-final { scan-assembler "tcmmrlfp16ps\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > #include <immintrin.h> > > void TEST() > diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c > b/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c > index a8dff945f23..8895da9aa8d 100644 > --- a/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c > +++ b/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c > @@ -1,7 +1,7 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-require-effective-target masm_intel } */ > /* { dg-options "-O2 -mamx-fp16 -masm=intel" } */ > -/* { dg-final { scan-assembler "tdpfp16ps\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > +/* { dg-final { scan-assembler "tdpfp16ps\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > #include <immintrin.h> > > void TEST () > diff --git a/gcc/testsuite/gcc.target/i386/amxfp8-asmintel-1.c > b/gcc/testsuite/gcc.target/i386/amxfp8-asmintel-1.c > index f83b7a9a287..eecd2f574c9 100644 > --- a/gcc/testsuite/gcc.target/i386/amxfp8-asmintel-1.c > +++ b/gcc/testsuite/gcc.target/i386/amxfp8-asmintel-1.c > @@ -1,10 +1,10 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-require-effective-target masm_intel } */ > /* { dg-options "-O2 -mamx-fp16 -masm=intel" } */ > -/* { dg-final { scan-assembler "tdpbf8ps\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > -/* { dg-final { scan-assembler "tdpbhf8ps\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > -/* { dg-final { scan-assembler "tdphbf8ps\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > -/* { dg-final { scan-assembler "tdphf8ps\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > +/* { dg-final { scan-assembler "tdpbf8ps\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > +/* { dg-final { scan-assembler "tdpbhf8ps\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > +/* { dg-final { scan-assembler "tdphbf8ps\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > +/* { dg-final { scan-assembler "tdphf8ps\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > > #include <immintrin.h> > > diff --git a/gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c > b/gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c > index abc98745a76..46c1fa06fa1 100644 > --- a/gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c > +++ b/gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c > @@ -1,10 +1,10 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-require-effective-target masm_intel } */ > /* { dg-options "-O2 -mamx-int8 -masm=intel" } */ > -/* { dg-final { scan-assembler "tdpbssd\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > -/* { dg-final { scan-assembler "tdpbsud\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > -/* { dg-final { scan-assembler "tdpbusd\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > -/* { dg-final { scan-assembler "tdpbuud\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > +/* { dg-final { scan-assembler "tdpbssd\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > +/* { dg-final { scan-assembler "tdpbsud\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > +/* { dg-final { scan-assembler "tdpbusd\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > +/* { dg-final { scan-assembler "tdpbuud\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > #include <immintrin.h> > > void TEST () > diff --git a/gcc/testsuite/gcc.target/i386/amxmovrs-asmintel-1.c > b/gcc/testsuite/gcc.target/i386/amxmovrs-asmintel-1.c > index 6a522b5c795..bb7a36026bf 100644 > --- a/gcc/testsuite/gcc.target/i386/amxmovrs-asmintel-1.c > +++ b/gcc/testsuite/gcc.target/i386/amxmovrs-asmintel-1.c > @@ -1,8 +1,8 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-require-effective-target masm_intel } */ > /* { dg-options "-O2 -mamx-movrs -masm=intel" } */ > -/* { dg-final { scan-assembler-times "tileloaddrs\[ \\t]%tmm\[0-9\]" 1 } } */ > -/* { dg-final { scan-assembler-times "tileloaddrst1\[ \\t]%tmm\[0-9\]" 1 } } > */ > +/* { dg-final { scan-assembler-times "tileloaddrs\[ \\t]tmm\[0-9\]" 1 } } */ > +/* { dg-final { scan-assembler-times "tileloaddrst1\[ \\t]tmm\[0-9\]" 1 } } > */ > #include <immintrin.h> > > extern const void* base; > diff --git a/gcc/testsuite/gcc.target/i386/amxtf32-asmintel-1.c > b/gcc/testsuite/gcc.target/i386/amxtf32-asmintel-1.c > index 01887887df3..03ae3075078 100644 > --- a/gcc/testsuite/gcc.target/i386/amxtf32-asmintel-1.c > +++ b/gcc/testsuite/gcc.target/i386/amxtf32-asmintel-1.c > @@ -1,7 +1,7 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-require-effective-target masm_intel } */ > /* { dg-options "-O2 -mamx-tf32 -masm=intel" } */ > -/* { dg-final { scan-assembler "tmmultf32ps\[ > \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ > +/* { dg-final { scan-assembler "tmmultf32ps\[ > \\t]+\[^\n%\]*tmm1+\[^\n%\]*tmm2+\[^\n%\]*tmm3" } } */ > #include <immintrin.h> > > void TEST() > diff --git a/gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c > b/gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c > index 6c08fec516c..661645d09d5 100644 > --- a/gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c > +++ b/gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c > @@ -4,10 +4,10 @@ > /* { dg-final { scan-assembler "ldtilecfg\[ \\t]" } } */ > /* { dg-final { scan-assembler "sttilecfg\[ \\t]" } } */ > /* { dg-final { scan-assembler "tilerelease" } } */ > -/* { dg-final { scan-assembler "tileloadd\[ \\t]%tmm\[0-9\]" } } */ > -/* { dg-final { scan-assembler "tileloaddt1\[ \\t]%tmm\[0-9\]" } } */ > -/* { dg-final { scan-assembler "tilestored\[ > \\t]\[^\n\]+\[^\n\]*%tmm\[0-9\]" } } */ > -/* { dg-final { scan-assembler "tilezero\[ \\t]+\[^\n\]*%tmm\[0-9\]" } } */ > +/* { dg-final { scan-assembler "tileloadd\[ \\t]tmm\[0-9\]" } } */ > +/* { dg-final { scan-assembler "tileloaddt1\[ \\t]tmm\[0-9\]" } } */ > +/* { dg-final { scan-assembler "tilestored\[ \\t]\[^\n\]+\[^\n\]*tmm\[0-9\]" > } } */ > +/* { dg-final { scan-assembler "tilezero\[ \\t]+\[^\n%\]*tmm\[0-9\]" } } */ > #include <immintrin.h> > > extern int a[]; > -- > 2.31.1 >
-- BR, Hongtao
