On Sun, Feb 25, 2024 at 8:25 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> On Sun, Feb 25, 2024 at 7:03 PM Hongtao Liu <crazy...@gmail.com> wrote:
> >
> > On Mon, Feb 26, 2024 at 10:37 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> > >
> > > On Sun, Feb 25, 2024 at 6:03 PM Hongtao Liu <crazy...@gmail.com> wrote:
> > > >
> > > > On Mon, Feb 26, 2024 at 5:11 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> > > > >
> > > > > ldtilecfg and sttilecfg take a 512-byte memory block.  With
> > > > > _tile_loadconfig implemented as
> > > > >
> > > > > extern __inline void
> > > > > __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > > _tile_loadconfig (const void *__config)
> > > > > {
> > > > >   __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > > **)__config)));
> > > > > }
> > > > >
> > > > > GCC sees:
> > > > >
> > > > > (parallel [
> > > > >   (asm_operands/v ("ldtilecfg   %X0") ("") 0
> > > > >    [(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
> > > > >                          (const_int -64 [0xffffffffffffffc0])) [1 
> > > > > MEM[(const void * *)&tile_data]+0 S8 A128])]
> > > > >    [(asm_input:DI ("m"))]
> > > > >    (clobber (reg:CC 17 flags))])
> > > > >
> > > > > and the memory operand size is 1 byte.  As the result, the rest of 511
> > > > > bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
> > > > > with a pointer to BLKmode to honor the 512-byte memory block.
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > >         PR target/114098
> > > > >         * config/i386/amxtileintrin.h (_tile_loadconfig): Use
> > > > >         __builtin_ia32_ldtilecfg.
> > > > >         (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
> > > > >         * config/i386/i386-builtin.def (BDESC): Add
> > > > >         __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
> > > > >         * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
> > > > >         IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
> > > > >         * config/i386/i386.md (ldtilecfg): New pattern.
> > > > >         (sttilecfg): Likewise.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > >         PR target/114098
> > > > >         * gcc.target/i386/amxtile-4.c: New test.
> > > > > ---
> > > > >  gcc/config/i386/amxtileintrin.h           |  4 +-
> > > > >  gcc/config/i386/i386-builtin.def          |  4 ++
> > > > >  gcc/config/i386/i386-expand.cc            | 19 ++++++++
> > > > >  gcc/config/i386/i386.md                   | 24 ++++++++++
> > > > >  gcc/testsuite/gcc.target/i386/amxtile-4.c | 55 
> > > > > +++++++++++++++++++++++
> > > > >  5 files changed, 104 insertions(+), 2 deletions(-)
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-4.c
> > > > >
> > > > > diff --git a/gcc/config/i386/amxtileintrin.h 
> > > > > b/gcc/config/i386/amxtileintrin.h
> > > > > index d1a26e0fea5..5081b326498 100644
> > > > > --- a/gcc/config/i386/amxtileintrin.h
> > > > > +++ b/gcc/config/i386/amxtileintrin.h
> > > > > @@ -39,14 +39,14 @@ extern __inline void
> > > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > >  _tile_loadconfig (const void *__config)
> > > > >  {
> > > > > -  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void 
> > > > > **)__config)));
> > > > > +  __builtin_ia32_ldtilecfg (__config);
> > > > >  }
> > > > >
> > > > >  extern __inline void
> > > > >  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > > > >  _tile_storeconfig (void *__config)
> > > > >  {
> > > > > -  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
> > > > > +  __builtin_ia32_sttilecfg (__config);
> > > > >  }
> > > > >
> > > > >  extern __inline void
> > > > > diff --git a/gcc/config/i386/i386-builtin.def 
> > > > > b/gcc/config/i386/i386-builtin.def
> > > > > index 729355230b8..88dd7f8857f 100644
> > > > > --- a/gcc/config/i386/i386-builtin.def
> > > > > +++ b/gcc/config/i386/i386-builtin.def
> > > > > @@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | 
> > > > > OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__b
> > > > >  BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, 
> > > > > CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, 
> > > > > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> > > > >  BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, 
> > > > > CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, 
> > > > > UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
> > > > >
> > > > > +/* LDFILECFG and STFILECFG.  */
> > > > > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > > > > CODE_FOR_ldtilecfg, "__builtin_ia32_ldtilecfg", 
> > > > > IX86_BUILTIN_LDTILECFG, UNKNOWN, (int) VOID_FTYPE_PCVOID)
> > > > > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, 
> > > > > CODE_FOR_ldtilecfg, "__builtin_ia32_sttilecfg", 
> > > > > IX86_BUILTIN_STTILECFG, UNKNOWN, (int) VOID_FTYPE_PVOID)
> > > > CODE_FOR_sttilecfg.
> > >
> > > It is unused.  I changed both to CODE_FOR_nothing.
> > >
> > > > > +
> > > > >  /* SSE */
> > > > >  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, 
> > > > > "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) 
> > > > > VOID_FTYPE_PFLOAT_V4SF)
> > > > >  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movntv4sf, 
> > > > > "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) 
> > > > > VOID_FTYPE_PFLOAT_V4SF)
> > > > > diff --git a/gcc/config/i386/i386-expand.cc 
> > > > > b/gcc/config/i386/i386-expand.cc
> > > > > index a4d3369f01b..17993eb837f 100644
> > > > > --- a/gcc/config/i386/i386-expand.cc
> > > > > +++ b/gcc/config/i386/i386-expand.cc
> > > > > @@ -14152,6 +14152,25 @@ ix86_expand_builtin (tree exp, rtx target, 
> > > > > rtx subtarget,
> > > > >         emit_insn (pat);
> > > > >        return 0;
> > > > >
> > > > > +    case IX86_BUILTIN_LDTILECFG:
> > > > > +    case IX86_BUILTIN_STTILECFG:
> > > > > +      arg0 = CALL_EXPR_ARG (exp, 0);
> > > > > +      op0 = expand_normal (arg0);
> > > > > +
> > > > > +      if (!address_operand (op0, VOIDmode))
> > > > > +       {
> > > > > +         op0 = convert_memory_address (Pmode, op0);
> > > > > +         op0 = copy_addr_to_reg (op0);
> > > > > +       }
> > > > > +      op0 = gen_rtx_MEM (BLKmode, op0);
> > > > maybe we can just use XImode, and adjust the patterns with XI.
> > >
> > > Changed.
> > >
> > > > > +      if (fcode == IX86_BUILTIN_LDTILECFG)
> > > > > +       icode = CODE_FOR_ldtilecfg;
> > > > > +      else
> > > > > +       icode = CODE_FOR_sttilecfg;
> > > > > +      pat = GEN_FCN (icode) (op0);
> > > > > +      emit_insn (pat);
> > > > > +      return 0;
> > > > > +
> > > > >      case IX86_BUILTIN_LLWPCB:
> > > > >        arg0 = CALL_EXPR_ARG (exp, 0);
> > > > >        op0 = expand_normal (arg0);
> > > > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > > > > index 6a26d966a0e..0ede6adac2f 100644
> > > > > --- a/gcc/config/i386/i386.md
> > > > > +++ b/gcc/config/i386/i386.md
> > > > > @@ -353,6 +353,10 @@ (define_c_enum "unspecv" [
> > > > >    ;; For USER_MSR support
> > > > >    UNSPECV_URDMSR
> > > > >    UNSPECV_UWRMSR
> > > > > +
> > > > > +  ;; For AMX-TILE
> > > > > +  UNSPECV_LDTILECFG
> > > > > +  UNSPECV_STTILECFG
> > > > >  ])
> > > > >
> > > > >  ;; Constants to represent rounding modes in the ROUND instruction
> > > > > @@ -28152,6 +28156,26 @@ (define_insn "uwrmsr"
> > > > >    [(set_attr "prefix" "vex")
> > > > >     (set_attr "type" "other")])
> > > > >
> > > > > +
> > > > > +(define_insn "ldtilecfg"
> > > > > +  [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "jm")]
> > > > ldtilecfg/sttilecfg can be extended with gpr32, so just use m instead 
> > > > of jm.
> > > > > +                   UNSPECV_LDTILECFG)]
> > > > > +  "TARGET_AMX_TILE"
> > > > > +  "ldtilecfg\t%0"
> > > >
> > > > > +  [(set_attr "type" "other")
> > > > > +   (set_attr "addr" "gpr16")
> > > > Remove this.
> > >
> > > Done.
> > >
> > > > > +   (set_attr "prefix" "vex")
> > > > Possible better with maybe_evex.
> > >
> > > Done.
> > >
> > > > > +   (set_attr "memory" "load")])
> > > > > +
> > > > > +(define_insn "sttilecfg"
> > > > > +  [(set (match_operand:BLK 0 "memory_operand" "=jm")
> > > > > +       (unspec_volatile:BLK [(const_int 0)] UNSPECV_STTILECFG))]
> > > > > +  "TARGET_AMX_TILE"
> > > > > +  "sttilecfg\t%0"
> > > > > +  [(set_attr "type" "other")
> > > > > +   (set_attr "addr" "gpr16")
> > > > > +   (set_attr "prefix" "vex")
> > > > > +   (set_attr "memory" "store")])
> > > > >  (include "mmx.md")
> > > > >  (include "sse.md")
> > > > >  (include "sync.md")
> > > > > diff --git a/gcc/testsuite/gcc.target/i386/amxtile-4.c 
> > > > > b/gcc/testsuite/gcc.target/i386/amxtile-4.c
> > > > > new file mode 100644
> > > > > index 00000000000..1255af2594e
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/i386/amxtile-4.c
> > > > > @@ -0,0 +1,55 @@
> > > > > +/* PR target/114098 */
> > > > > +/* { dg-do compile { target { ! ia32 } } } */
> > > > > +/* { dg-options "-O2 -mamx-tile" } */
> > > > > +
> > > > > +#include <stdint.h>
> > > > > +#include <x86intrin.h>
> > > > > +
> > > > > +#define MAX_ROWS 16
> > > > > +#define MAX_COLS 64
> > > > > +#define MAX 1024
> > > > > +#define STRIDE 64
> > > > > +
> > > > > +typedef struct __tile_config
> > > > > +{
> > > > > +  uint8_t palette_id;
> > > > > +  uint8_t start_row;
> > > > > +  uint8_t reserved_0[14];
> > > > > +  uint16_t colsb[16];
> > > > > +  uint8_t rows[16];
> > > > > +} __tilecfg;
> > > > > +
> > > > > +
> > > > > +extern void bar (__tilecfg *tileinfo);
> > > > > +
> > > > > +/* Initialize tile config */
> > > > > +static void
> > > > > +init_tile_config (__tilecfg *tileinfo)
> > > > > +{
> > > > > +  int i;
> > > > > +  tileinfo->palette_id = 1;
> > > > > +  tileinfo->start_row = 0;
> > > > > +
> > > > > +  for (i = 0; i < 1; ++i)
> > > > > +  {
> > > > > +    tileinfo->colsb[i] = MAX_ROWS;
> > > > > +    tileinfo->rows[i] = MAX_ROWS;
> > > > > +  }
> > > > > +
> > > > > +  for (i = 1; i < 4; ++i)
> > > > > +  {
> > > > > +    tileinfo->colsb[i] = MAX_COLS;
> > > > > +    tileinfo->rows[i] = MAX_ROWS;
> > > > > +  }
> > > > > +
> > > > > +  _tile_loadconfig (tileinfo);
> > > > > +}
> > > > > +
> > > > > +void
> > > > > +enable_amx (void)
> > > > > +{
> > > > > +  __tilecfg tile_data = {0};
> > > > > +  init_tile_config (&tile_data);
> > > > > +}
> > > > > +
> > > > > +/* { dg-final { scan-assembler-times "pxor\[^\n\]*%xmm" 1 } } */
> > > > > --
> > > > > 2.43.2
> > > > >
> > > >
> > >
> > > I am testing this patch now.
> > Ok if it passes the regression test.
>
> Test passed.  I am checking it in.
>
> Thanks.
>

OK to backport to release branches?


-- 
H.J.

Reply via email to