Re: [PING^2] [PATCH v2] rs6000: Adding missed ISA 3.0 atomic memory operation instructions.

Surya Kumari Jangala Mon, 28 Apr 2025 04:37:18 -0700

Hi Jeevitha,
Looks like Peter's review comments have not been incorporated.
Please update the patch and, as Peter suggested, send the next version of the 
patch as it's own email thread.


Regards,
Surya

On 03/04/25 8:17 pm, jeevitha wrote:
> 
> Ping!
> 
> please review.
> 
> Thanks & Regards
> Jeevitha
> 
> On 20/02/25 7:41 pm, jeevitha wrote:
>> Hi All,
>>
>> The following patch has been bootstrapped and regtested on powerpc64le-linux.
>>
>> Changes to amo.h include the addition of the following load atomic 
>> operations:
>> Compare and Swap Not Equal, Fetch and Increment Bounded, Fetch and Increment
>> Equal, and Fetch and Decrement Bounded. Additionally, Store Twin is added for
>> store atomic operations.
>>
>> 2025-02-20 Peter Bergner <berg...@linux.ibm.com>
>>
>> gcc/:
>>      * config/rs6000/amo.h: Add missing atomic memory operations.
>>      * doc/extend.texi (PowerPC Atomic Memory Operation Functions):
>>         Document new functions.
>>
>> gcc/testsuite/:
>>      * gcc.target/powerpc/amo3.c: New test.
>>      * gcc.target/powerpc/amo4.c: Likewise.
>>      * gcc.target/powerpc/amo5.c: Likewise.
>>      * gcc.target/powerpc/amo6.c: Likewise.
>>      * gcc.target/powerpc/amo7.c: Likewise.
>>
>> Co-authored-by: Jeevitha Palanisamy  <jeevi...@linux.ibm.com>
>>
>>
>> diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h
>> index 25ab1c7b4c4..10960208d31 100644
>> --- a/gcc/config/rs6000/amo.h
>> +++ b/gcc/config/rs6000/amo.h
>> @@ -71,6 +71,64 @@ NAME (TYPE *_PTR, TYPE _VALUE)                            
>>                 \
>>    return _RET;                                                              
>> \
>>  }
>>  
>> +/* Implementation of the LWAT/LDAT operations that take two input registers
>> +   and modify one word or double-word of memory and return the value that 
>> was
>> +   previously in the memory location.  The destination and two source
>> +   registers are encoded with only one register number, so we need three
>> +   consecutive GPR registers and there is no C/C++ type that will give
>> +   us that, so we have to use register asm variables to achieve that.
>> +
>> +   The LWAT/LDAT opcode requires the address to be a single register,
>> +   and that points to a suitably aligned memory location.  Asm volatile
>> +   is used to prevent the optimizer from moving the operation.  */
>> +
>> +#define _AMO_LD_CMPSWP(NAME, TYPE, OPCODE, FC)                              
>> \
>> +static __inline__ TYPE                                                      
>> \
>> +NAME (TYPE *_PTR, TYPE _COND, TYPE _VALUE)                          \
>> +{                                                                   \
>> +  register TYPE _ret asm ("r8");                                    \
>> +  register TYPE _cond asm ("r9") = _COND;                           \
>> +  register TYPE _value asm ("r10") = _VALUE;                                
>> \
>> +  __asm__ __volatile__ (OPCODE " %[ret],%P[addr],%[code]"           \
>> +                    : [addr] "+Q" (_PTR[0]), [ret] "=r" (_ret)      \
>> +                    : "r" (_cond), "r" (_value), [code] "n" (FC));  \
>> +  return _ret;                                                              
>> \
>> +}
>> +
>> +/* Implementation of the LWAT/LDAT fetch and increment operations.
>> +
>> +   The LWAT/LDAT opcode requires the address to be a single register that
>> +   points to a suitably aligned memory location.  Asm volatile is used to
>> +   prevent the optimizer from moving the operation.  */
>> +
>> +#define _AMO_LD_INCREMENT(NAME, TYPE, OPCODE, FC)                   \
>> +static __inline__ TYPE                                                      
>> \
>> +NAME (TYPE *_PTR)                                                   \
>> +{                                                                   \
>> +  TYPE _RET;                                                                
>> \
>> +  __asm__ volatile (OPCODE " %[ret],%P[addr],%[code]\n"                     
>> \
>> +                : [addr] "+Q" (_PTR[0]), [ret] "=r" (_RET)          \
>> +                : "Q" (*(TYPE (*)[2]) _PTR), [code] "n" (FC));      \
>> +  return _RET;                                                              
>> \
>> +}
>> +
>> +/* Implementation of the LWAT/LDAT fetch and decrement operations.
>> +
>> +   The LWAT/LDAT opcode requires the address to be a single register that
>> +   points to a suitably aligned memory location.  Asm volatile is used to
>> +   prevent the optimizer from moving the operation.  */
>> +
>> +#define _AMO_LD_DECREMENT(NAME, TYPE, OPCODE, FC)                   \
>> +static __inline__ TYPE                                                      
>> \
>> +NAME (TYPE *_PTR)                                                   \
>> +{                                                                   \
>> +  TYPE _RET;                                                                
>> \
>> +  __asm__ volatile (OPCODE " %[ret],%P[addr],%[code]\n"                     
>> \
>> +                : [addr] "+Q" (_PTR[1]), [ret] "=r" (_RET)          \
>> +                : "Q" (*(TYPE (*)[2]) (_PTR)), [code] "n" (FC));    \
>> +  return _RET;                                                              
>> \
>> +}
>> +
>>  _AMO_LD_SIMPLE (amo_lwat_add,   uint32_t, "lwat", _AMO_LD_ADD)
>>  _AMO_LD_SIMPLE (amo_lwat_xor,   uint32_t, "lwat", _AMO_LD_XOR)
>>  _AMO_LD_SIMPLE (amo_lwat_ior,   uint32_t, "lwat", _AMO_LD_IOR)
>> @@ -78,11 +136,19 @@ _AMO_LD_SIMPLE (amo_lwat_and,   uint32_t, "lwat", 
>> _AMO_LD_AND)
>>  _AMO_LD_SIMPLE (amo_lwat_umax,  uint32_t, "lwat", _AMO_LD_UMAX)
>>  _AMO_LD_SIMPLE (amo_lwat_umin,  uint32_t, "lwat", _AMO_LD_UMIN)
>>  _AMO_LD_SIMPLE (amo_lwat_swap,  uint32_t, "lwat", _AMO_LD_SWAP)
>> +_AMO_LD_CMPSWP    (amo_lwat_cas_neq,     uint32_t, "lwat", _AMO_LD_CS_NE)
>> +_AMO_LD_INCREMENT (amo_lwat_inc_eq,      uint32_t, "lwat", 
>> _AMO_LD_INC_EQUAL)
>> +_AMO_LD_INCREMENT (amo_lwat_inc_bounded, uint32_t, "lwat", 
>> _AMO_LD_INC_BOUNDED)
>> +_AMO_LD_DECREMENT (amo_lwat_dec_bounded, uint32_t, "lwat", 
>> _AMO_LD_DEC_BOUNDED)
>>  
>>  _AMO_LD_SIMPLE (amo_lwat_sadd,  int32_t,  "lwat", _AMO_LD_ADD)
>>  _AMO_LD_SIMPLE (amo_lwat_smax,  int32_t,  "lwat", _AMO_LD_SMAX)
>>  _AMO_LD_SIMPLE (amo_lwat_smin,  int32_t,  "lwat", _AMO_LD_SMIN)
>>  _AMO_LD_SIMPLE (amo_lwat_sswap, int32_t,  "lwat", _AMO_LD_SWAP)
>> +_AMO_LD_CMPSWP    (amo_lwat_scas_neq,     int32_t, "lwat", _AMO_LD_CS_NE)
>> +_AMO_LD_INCREMENT (amo_lwat_sinc_eq,      int32_t, "lwat", 
>> _AMO_LD_INC_EQUAL)
>> +_AMO_LD_INCREMENT (amo_lwat_sinc_bounded, int32_t, "lwat", 
>> _AMO_LD_INC_BOUNDED)
>> +_AMO_LD_DECREMENT (amo_lwat_sdec_bounded, int32_t, "lwat", 
>> _AMO_LD_DEC_BOUNDED)
>>  
>>  _AMO_LD_SIMPLE (amo_ldat_add,   uint64_t, "ldat", _AMO_LD_ADD)
>>  _AMO_LD_SIMPLE (amo_ldat_xor,   uint64_t, "ldat", _AMO_LD_XOR)
>> @@ -91,12 +157,19 @@ _AMO_LD_SIMPLE (amo_ldat_and,   uint64_t, "ldat", 
>> _AMO_LD_AND)
>>  _AMO_LD_SIMPLE (amo_ldat_umax,  uint64_t, "ldat", _AMO_LD_UMAX)
>>  _AMO_LD_SIMPLE (amo_ldat_umin,  uint64_t, "ldat", _AMO_LD_UMIN)
>>  _AMO_LD_SIMPLE (amo_ldat_swap,  uint64_t, "ldat", _AMO_LD_SWAP)
>> +_AMO_LD_CMPSWP    (amo_ldat_cas_neq,     uint64_t, "ldat", _AMO_LD_CS_NE)
>> +_AMO_LD_INCREMENT (amo_ldat_inc_eq,      uint64_t, "ldat", 
>> _AMO_LD_INC_EQUAL)
>> +_AMO_LD_INCREMENT (amo_ldat_inc_bounded, uint64_t, "ldat", 
>> _AMO_LD_INC_BOUNDED)
>> +_AMO_LD_DECREMENT (amo_ldat_dec_bounded, uint64_t, "ldat", 
>> _AMO_LD_DEC_BOUNDED)
>>  
>>  _AMO_LD_SIMPLE (amo_ldat_sadd,  int64_t,  "ldat", _AMO_LD_ADD)
>>  _AMO_LD_SIMPLE (amo_ldat_smax,  int64_t,  "ldat", _AMO_LD_SMAX)
>>  _AMO_LD_SIMPLE (amo_ldat_smin,  int64_t,  "ldat", _AMO_LD_SMIN)
>>  _AMO_LD_SIMPLE (amo_ldat_sswap, int64_t,  "ldat", _AMO_LD_SWAP)
>> -
>> +_AMO_LD_CMPSWP    (amo_ldat_scas_neq,     int64_t, "ldat", _AMO_LD_CS_NE)
>> +_AMO_LD_INCREMENT (amo_ldat_sinc_eq,      int64_t, "ldat", 
>> _AMO_LD_INC_EQUAL)
>> +_AMO_LD_INCREMENT (amo_ldat_sinc_bounded, int64_t, "ldat", 
>> _AMO_LD_INC_BOUNDED)
>> +_AMO_LD_DECREMENT (amo_ldat_sdec_bounded, int64_t, "ldat", 
>> _AMO_LD_DEC_BOUNDED)
>>  /* Enumeration of the STWAT/STDAT sub-opcodes.  */
>>  enum _AMO_ST {
>>    _AMO_ST_ADD               = 0x00,         /* Store Add.  */
>> @@ -127,16 +200,35 @@ NAME (TYPE *_PTR, TYPE _VALUE)                         
>>                 \
>>    return;                                                           \
>>  }
>>  
>> +/* Implementation of the STWAT/STDAT store twin operation that takes
>> +   one register and modifies two words or double-wordxs of memory.
>> +   No value is returned.
>> +
>> +   The STWAT/STDAT opcode requires the address to be a single register
>> +   that points to a suitably aligned memory location.  Asm volatile is
>> +   used to prevent the optimizer from moving the operation.  */
>> +
>> +#define _AMO_ST_TWIN(NAME, TYPE, OPCODE, FC)                        \
>> +static __inline__ void                                                      
>> \
>> +NAME (TYPE *_PTR, TYPE _VALUE)                                              
>> \
>> +{                                                                   \
>> +  __asm__ volatile (OPCODE " %[src],%P[addr],%[code]"                       
>> \
>> +                : [addr] "+Q" (*(TYPE (*)[2]) _PTR)                 \
>> +                : [src] "r" (_VALUE), [code] "n" (FC));             \
>> +}
>> +
>>  _AMO_ST_SIMPLE (amo_stwat_add,  uint32_t, "stwat", _AMO_ST_ADD)
>>  _AMO_ST_SIMPLE (amo_stwat_xor,  uint32_t, "stwat", _AMO_ST_XOR)
>>  _AMO_ST_SIMPLE (amo_stwat_ior,  uint32_t, "stwat", _AMO_ST_IOR)
>>  _AMO_ST_SIMPLE (amo_stwat_and,  uint32_t, "stwat", _AMO_ST_AND)
>>  _AMO_ST_SIMPLE (amo_stwat_umax, uint32_t, "stwat", _AMO_ST_UMAX)
>>  _AMO_ST_SIMPLE (amo_stwat_umin, uint32_t, "stwat", _AMO_ST_UMIN)
>> +_AMO_ST_TWIN   (amo_stwat_twin, uint32_t, "stwat", _AMO_ST_TWIN)
>>  
>>  _AMO_ST_SIMPLE (amo_stwat_sadd, int32_t,  "stwat", _AMO_ST_ADD)
>>  _AMO_ST_SIMPLE (amo_stwat_smax, int32_t,  "stwat", _AMO_ST_SMAX)
>>  _AMO_ST_SIMPLE (amo_stwat_smin, int32_t,  "stwat", _AMO_ST_SMIN)
>> +_AMO_ST_TWIN   (amo_stwat_stwin, int32_t, "stwat", _AMO_ST_TWIN)
>>  
>>  _AMO_ST_SIMPLE (amo_stdat_add,  uint64_t, "stdat", _AMO_ST_ADD)
>>  _AMO_ST_SIMPLE (amo_stdat_xor,  uint64_t, "stdat", _AMO_ST_XOR)
>> @@ -144,9 +236,11 @@ _AMO_ST_SIMPLE (amo_stdat_ior,  uint64_t, "stdat", 
>> _AMO_ST_IOR)
>>  _AMO_ST_SIMPLE (amo_stdat_and,  uint64_t, "stdat", _AMO_ST_AND)
>>  _AMO_ST_SIMPLE (amo_stdat_umax, uint64_t, "stdat", _AMO_ST_UMAX)
>>  _AMO_ST_SIMPLE (amo_stdat_umin, uint64_t, "stdat", _AMO_ST_UMIN)
>> +_AMO_ST_TWIN   (amo_stdat_twin, uint64_t, "stdat", _AMO_ST_TWIN)
>>  
>>  _AMO_ST_SIMPLE (amo_stdat_sadd, int64_t,  "stdat", _AMO_ST_ADD)
>>  _AMO_ST_SIMPLE (amo_stdat_smax, int64_t,  "stdat", _AMO_ST_SMAX)
>>  _AMO_ST_SIMPLE (amo_stdat_smin, int64_t,  "stdat", _AMO_ST_SMIN)
>> +_AMO_ST_TWIN   (amo_stdat_stwin, int64_t, "stdat", _AMO_ST_TWIN)
>>  #endif      /* _ARCH_PWR9 && _ARCH_PPC64.  */
>>  #endif      /* _POWERPC_AMO_H.  */
>> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
>> index ec9bb59900c..96deee2a794 100644
>> --- a/gcc/doc/extend.texi
>> +++ b/gcc/doc/extend.texi
>> @@ -24807,11 +24807,19 @@ uint32_t amo_lwat_and (uint32_t *, uint32_t);
>>  uint32_t amo_lwat_umax (uint32_t *, uint32_t);
>>  uint32_t amo_lwat_umin (uint32_t *, uint32_t);
>>  uint32_t amo_lwat_swap (uint32_t *, uint32_t);
>> +uint32_t amo_lwat_cas_neq (uint32_t *, uint32_t, uint32_t);
>> +uint32_t amo_lwat_inc_eq (uint32_t *);
>> +uint32_t amo_lwat_inc_bounded (uint32_t *);
>> +uint32_t amo_lwat_dec_bounded (uint32_t *);
>>  
>>  int32_t amo_lwat_sadd (int32_t *, int32_t);
>>  int32_t amo_lwat_smax (int32_t *, int32_t);
>>  int32_t amo_lwat_smin (int32_t *, int32_t);
>>  int32_t amo_lwat_sswap (int32_t *, int32_t);
>> +int32_t amo_lwat_scas_neq (int32_t *, int32_t, int32_t);
>> +int32_t amo_lwat_sinc_eq (int32_t *);
>> +int32_t amo_lwat_sinc_bounded (int32_t *);
>> +int32_t amo_lwat_sdec_bounded (int32_t *);
>>  
>>  uint64_t amo_ldat_add (uint64_t *, uint64_t);
>>  uint64_t amo_ldat_xor (uint64_t *, uint64_t);
>> @@ -24820,11 +24828,19 @@ uint64_t amo_ldat_and (uint64_t *, uint64_t);
>>  uint64_t amo_ldat_umax (uint64_t *, uint64_t);
>>  uint64_t amo_ldat_umin (uint64_t *, uint64_t);
>>  uint64_t amo_ldat_swap (uint64_t *, uint64_t);
>> +uint64_t amo_ldat_cas_neq (uint64_t *, uint64_t, uint64_t);
>> +uint64_t amo_ldat_inc_eq (uint64_t *);
>> +uint64_t amo_ldat_inc_bounded (uint64_t *);
>> +uint64_t amo_ldat_dec_bounded (uint64_t *);
>>  
>>  int64_t amo_ldat_sadd (int64_t *, int64_t);
>>  int64_t amo_ldat_smax (int64_t *, int64_t);
>>  int64_t amo_ldat_smin (int64_t *, int64_t);
>>  int64_t amo_ldat_sswap (int64_t *, int64_t);
>> +int64_t amo_ldat_scas_neq (int64_t *, int64_t, int64_t);
>> +int64_t amo_ldat_sinc_eq (int64_t *);
>> +int64_t amo_ldat_sinc_bounded (int64_t *);
>> +int64_t amo_ldat_sdec_bounded (int64_t *);
>>  
>>  void amo_stwat_add (uint32_t *, uint32_t);
>>  void amo_stwat_xor (uint32_t *, uint32_t);
>> @@ -24832,10 +24848,12 @@ void amo_stwat_ior (uint32_t *, uint32_t);
>>  void amo_stwat_and (uint32_t *, uint32_t);
>>  void amo_stwat_umax (uint32_t *, uint32_t);
>>  void amo_stwat_umin (uint32_t *, uint32_t);
>> +void amo_stwat_twin (uint32_t *, uint32_t);
>>  
>>  void amo_stwat_sadd (int32_t *, int32_t);
>>  void amo_stwat_smax (int32_t *, int32_t);
>>  void amo_stwat_smin (int32_t *, int32_t);
>> +void amo_stwat_stwin (int32_t *, int32_t);
>>  
>>  void amo_stdat_add (uint64_t *, uint64_t);
>>  void amo_stdat_xor (uint64_t *, uint64_t);
>> @@ -24843,10 +24861,12 @@ void amo_stdat_ior (uint64_t *, uint64_t);
>>  void amo_stdat_and (uint64_t *, uint64_t);
>>  void amo_stdat_umax (uint64_t *, uint64_t);
>>  void amo_stdat_umin (uint64_t *, uint64_t);
>> +void amo_stdat_twin (uint64_t *, uint64_t);
>>  
>>  void amo_stdat_sadd (int64_t *, int64_t);
>>  void amo_stdat_smax (int64_t *, int64_t);
>>  void amo_stdat_smin (int64_t *, int64_t);
>> +void amo_stdat_stwin (int64_t *, int64_t);
>>  @end smallexample
>>  
>>  @node PowerPC Matrix-Multiply Assist Built-in Functions
>> diff --git a/gcc/testsuite/gcc.target/powerpc/amo3.c 
>> b/gcc/testsuite/gcc.target/powerpc/amo3.c
>> new file mode 100644
>> index 00000000000..08928331a11
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/amo3.c
>> @@ -0,0 +1,132 @@
>> +/* { dg-do compile { target { lp64 } } } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +/* { dg-require-effective-target powerpc_vsx } */
>> +
>> +/* Test whether the ISA 3.0 amo (atomic memory operations) functions 
>> perform as
>> +   expected.  */
>> +
>> +#include <amo.h>
>> +#include <stdint.h>
>> +
>> +uint32_t
>> +do_lw_cs_ne (uint32_t *mem, uint32_t cond, uint32_t value)
>> +{
>> +  return amo_lwat_cas_neq (mem, cond, value);
>> +}
>> +
>> +int32_t
>> +do_lw_scs_ne (int32_t *mem, int32_t cond, int32_t value)
>> +{
>> +  return amo_lwat_scas_neq (mem, cond, value);
>> +}
>> +
>> +uint32_t
>> +do_lw_inc_equal (uint32_t *mem)
>> +{
>> +  return amo_lwat_inc_eq (mem);
>> +}
>> +
>> +int32_t
>> +do_lw_sinc_equal (int32_t *mem)
>> +{
>> +  return amo_lwat_sinc_eq (mem);
>> +}
>> +
>> +uint32_t
>> +do_lw_inc_bounded (uint32_t *mem)
>> +{
>> +  return amo_lwat_inc_bounded (mem);
>> +}
>> +
>> +int32_t
>> +do_lw_sinc_bounded (int32_t *mem)
>> +{
>> +  return amo_lwat_sinc_bounded (mem);
>> +}
>> +uint32_t
>> +do_lw_dec_bounded (uint32_t *mem)
>> +{
>> +  return amo_lwat_dec_bounded (mem);
>> +}
>> +
>> +int32_t
>> +do_lw_sdec_bounded (int32_t *mem)
>> +{
>> +  return amo_lwat_sdec_bounded (mem);
>> +}
>> +
>> +uint64_t
>> +do_ld_cs_ne (uint64_t *mem, uint64_t cond, uint64_t value)
>> +{
>> +  return amo_ldat_cas_neq (mem, cond, value);
>> +}
>> +
>> +int64_t
>> +do_ld_scs_ne (int64_t *mem, int64_t cond, int64_t value)
>> +{
>> +  return amo_ldat_scas_neq (mem, cond, value);
>> +}
>> +
>> +uint64_t
>> +do_ld_inc_equal (uint64_t *mem)
>> +{
>> +  return amo_ldat_inc_eq (mem);
>> +}
>> +
>> +int64_t
>> +do_ld_sinc_equal (int64_t *mem)
>> +{
>> +  return amo_ldat_sinc_eq (mem);
>> +}
>> +
>> +uint64_t
>> +do_ld_inc_bounded (uint64_t *mem)
>> +{
>> +  return amo_ldat_inc_bounded (mem);
>> +}
>> +
>> +int64_t
>> +do_ld_sinc_bounded (int64_t *mem)
>> +{
>> +  return amo_ldat_sinc_bounded (mem);
>> +}
>> +uint64_t
>> +do_ld_dec_bounded (uint64_t *mem)
>> +{
>> +  return amo_ldat_dec_bounded (mem);
>> +}
>> +
>> +int64_t
>> +do_ld_sdec_bounded (int64_t *mem)
>> +{
>> +  return amo_ldat_sdec_bounded (mem);
>> +}
>> +
>> +void
>> +do_sw_twin (uint32_t *mem, uint32_t value)
>> +{
>> +  amo_stwat_twin (mem, value);
>> +}
>> +
>> +void
>> +do_sw_stwin (int32_t *mem, int32_t value)
>> +{
>> +  amo_stwat_stwin (mem, value);
>> +}
>> +
>> +void
>> +do_sd_twin (uint64_t *mem, uint64_t value)
>> +{
>> +  amo_stdat_twin (mem, value);
>> +}
>> +
>> +void
>> +do_sd_stwin (int64_t *mem, int64_t value)
>> +{
>> +  amo_stdat_stwin (mem, value);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mldat\M}  8 } } */
>> +/* { dg-final { scan-assembler-times {\mlwat\M}  8 } } */
>> +/* { dg-final { scan-assembler-times {\mstdat\M}  2 } } */
>> +/* { dg-final { scan-assembler-times {\mstwat\M}  2 } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/amo4.c 
>> b/gcc/testsuite/gcc.target/powerpc/amo4.c
>> new file mode 100644
>> index 00000000000..fce85c8dc52
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/amo4.c
>> @@ -0,0 +1,92 @@
>> +/* { dg-do run { target { lp64 && p9vector_hw } } } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +/* { dg-require-effective-target powerpc_vsx } */
>> +
>> +#include <amo.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <limits.h>
>> +
>> +/* Test whether the ISA 3.0 amo (atomic memory operations) functions 
>> perform as
>> +   expected.  */
>> +
>> +/* 32-bit tests.  */
>> +static uint32_t u32_ld[4][2] = {
>> +  { 10, 15 },                       /* Increment Bounded */
>> +  { 10, 10 },                       /* Increment Bounded */
>> +  { 10, 10 },                       /* Increment Equal */
>> +  { 10, 15 }                        /* Increment Equal */
>> +};
>> +
>> +static uint32_t u32_result[4];
>> +
>> +static uint32_t u32_update[4] = {
>> +  10 + 1,                   /* Increment Bounded */
>> +  10,                               /* Increment Bounded */
>> +  10 + 1,                   /* Increment Equal */
>> +  10                                /* Increment Equal */
>> +};
>> +
>> +static uint32_t u32_prev[4] = {
>> +  10,                               /* Increment Bounded */
>> +  INT_MIN,                  /* Increment Bounded */
>> +  10,                               /* Increment Equal */
>> +  INT_MIN                   /* Increment Equal */
>> +};
>> +
>> +/* 64-bit tests.  */
>> +static uint64_t u64_ld[4][2] = {
>> +  { 10, 15 },                       /* Increment Bounded */
>> +  { 10, 10 },                       /* Increment Bounded */
>> +  { 10, 10 },                       /* Increment Equal */
>> +  { 10, 15 }                        /* Increment Equal */
>> +};
>> +
>> +static uint64_t u64_result[4];
>> +
>> +static uint64_t u64_update[4] = {
>> +  10 + 1,                   /* Increment Bounded */
>> +  10,                               /* Increment Bounded */
>> +  10 + 1,                   /* Increment Equal */
>> +  10                                /* Increment Equal */
>> +};
>> +
>> +static uint64_t u64_prev[4] = {
>> +  10,                               /* Increment Bounded */
>> +  INT64_MIN,                        /* Increment Bounded */
>> +  10,                               /* Increment Equal */
>> +  INT64_MIN                 /* Increment Equal */
>> +};
>> +
>> +int
>> +main (void)
>> +{
>> +  size_t i;
>> +
>> +  u32_result[0] = amo_lwat_inc_bounded (&u32_ld[0][0]);
>> +  u32_result[1] = amo_lwat_inc_bounded (&u32_ld[1][0]);
>> +  u32_result[2] = amo_lwat_inc_eq (&u32_ld[2][0]);
>> +  u32_result[3] = amo_lwat_inc_eq (&u32_ld[3][0]);
>> +
>> +  u64_result[0] = amo_ldat_inc_bounded (&u64_ld[0][0]);
>> +  u64_result[1] = amo_ldat_inc_bounded (&u64_ld[1][0]);
>> +  u64_result[2] = amo_ldat_inc_eq (&u64_ld[2][0]);
>> +  u64_result[3] = amo_ldat_inc_eq (&u64_ld[3][0]);
>> +
>> +  for (i = 0; i < 4; i++)
>> +    {
>> +      if (u32_result[i] != u32_prev[i])
>> +    abort ();
>> +
>> +      if (u32_ld[i][0] != u32_update[i])
>> +    abort ();
>> +
>> +      if (u64_result[i] != u64_prev[i])
>> +    abort ();
>> +
>> +      if (u64_ld[i][0] != u64_update[i])
>> +    abort ();
>> +    }
>> +
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/powerpc/amo5.c 
>> b/gcc/testsuite/gcc.target/powerpc/amo5.c
>> new file mode 100644
>> index 00000000000..b28176a0d7e
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/amo5.c
>> @@ -0,0 +1,44 @@
>> +/* { dg-do run { target { lp64 && p9vector_hw } } } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +/* { dg-require-effective-target powerpc_vsx } */
>> +
>> +#include <amo.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <limits.h>
>> +
>> +/* Test whether the ISA 3.0 amo (atomic memory operations) functions 
>> perform as
>> +   expected.  */
>> +
>> +int
>> +main (void)
>> +{
>> +  static uint32_t u32_mem = 100;
>> +  static uint32_t u32_cond = 200;
>> +  static uint32_t u32_value = 250;
>> +  static uint32_t u32_prev = 100;
>> +  static uint32_t u32_result;
>> +
>> +  static uint64_t u64_mem = 200;
>> +  static uint64_t u64_cond = 300;
>> +  static uint64_t u64_value = 250;
>> +  static uint64_t u64_prev = 200;
>> +  static uint64_t u64_result;
>> +
>> +  u32_result = amo_lwat_cas_neq (&u32_mem, u32_cond, u32_value);
>> +  u64_result = amo_ldat_cas_neq (&u64_mem, u64_cond, u64_value);
>> +
>> +  if (u32_mem != u32_value)
>> +    abort();
>> +
>> +  if (u32_result != u32_prev)
>> +    abort();
>> +
>> +  if (u64_mem != u64_value)
>> +    abort();
>> +
>> +  if (u64_result != u64_prev)
>> +    abort();
>> +
>> +    return 0;
>> + }
>> diff --git a/gcc/testsuite/gcc.target/powerpc/amo6.c 
>> b/gcc/testsuite/gcc.target/powerpc/amo6.c
>> new file mode 100644
>> index 00000000000..40a6691c0fe
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/amo6.c
>> @@ -0,0 +1,40 @@
>> +/* { dg-do run { target { lp64 && p9vector_hw } } } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +/* { dg-require-effective-target powerpc_vsx } */
>> +
>> +#include <amo.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <limits.h>
>> +
>> +/* Test whether the ISA 3.0 amo (atomic memory operations) functions 
>> perform as
>> +   expected.  */
>> +
>> +int
>> +main (void)
>> +{
>> +  size_t i;
>> +  static uint32_t u32_mem[2] = { 3, 3 };
>> +  static uint32_t u32_value = 5;
>> +  static uint32_t u32_prev[2] = { 3, 3 };
>> +  static uint32_t u32_result;
>> +
>> +  static uint64_t u64_mem[2] = { 7, 7 };
>> +  static uint64_t u64_value = 9;
>> +  static uint64_t u64_prev[2] = { 7, 7 };
>> +  static uint64_t u64_result;
>> +
>> +  amo_stwat_twin (u32_mem, u32_value);
>> +  amo_stdat_twin (u64_mem, u64_value);
>> +
>> +  for ( i = 0; i < 2; i++)
>> +    {
>> +       if (u32_mem[i] != u32_value)
>> +          abort();
>> +
>> +       if (u64_mem[i] != u64_value)
>> +              abort();
>> +     }
>> +
>> +    return 0;
>> + }
>> diff --git a/gcc/testsuite/gcc.target/powerpc/amo7.c 
>> b/gcc/testsuite/gcc.target/powerpc/amo7.c
>> new file mode 100644
>> index 00000000000..3fd3a56f52d
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/amo7.c
>> @@ -0,0 +1,76 @@
>> +/* { dg-do run { target { lp64 && p9vector_hw } } } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +/* { dg-require-effective-target powerpc_vsx } */
>> +
>> +#include <amo.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <limits.h>
>> +
>> +/* Test whether the ISA 3.0 amo (atomic memory operations) functions 
>> perform as
>> +   expected.  */
>> +
>> +/* 32-bit tests.  */
>> +static uint32_t u32_ld[2][2] = {
>> +  { 10, 15 },                       /* Decrement Bounded */
>> +  { 10, 10 },                       /* Decrement Bounded */
>> +};
>> +
>> +static uint32_t u32_result[2];
>> +
>> +static uint32_t u32_update[2] = {
>> +  15 - 1,                   /* Decrement Bounded */
>> +  10,                               /* Decrement Bounded */
>> +};
>> +
>> +static uint32_t u32_prev[2] = {
>> +  15,                               /* Decrement Bounded */
>> +  INT_MIN,                  /* Decrement Bounded */
>> +};
>> +
>> +/* 64-bit tests.  */
>> +static uint64_t u64_ld[2][2] = {
>> +  { 10, 15 },                   /* Decrement Bounded */
>> +  { 10, 10 },                   /* Decrement Bounded */
>> +};
>> +
>> +static uint64_t u64_result[2];
>> +
>> +static uint64_t u64_update[2] = {
>> +  15 - 1,                       /* Decrement Bounded */
>> +  10,                           /* Decrement Bounded */
>> +};
>> +
>> +static uint64_t u64_prev[2] = {
>> +  15,                           /* Decrement Bounded */
>> +  INT64_MIN,                    /* Decrement Bounded */
>> +};
>> +
>> +int
>> +main (void)
>> +{
>> +  size_t i;
>> +
>> +  u32_result[0] = amo_lwat_dec_bounded (&u32_ld[0][0]);
>> +  u32_result[1] = amo_lwat_dec_bounded (&u32_ld[1][0]);
>> +
>> +  u64_result[0] = amo_ldat_dec_bounded (&u64_ld[0][0]);
>> +  u64_result[1] = amo_ldat_dec_bounded (&u64_ld[1][0]);
>> +  
>> +  for (i = 0; i < 2; i++)
>> +    {
>> +      if (u32_result[i] != u32_prev[i])
>> +    abort ();
>> +
>> +      if (u32_ld[i][1] != u32_update[i])
>> +    abort ();
>> +
>> +      if (u64_result[i] != u64_prev[i])
>> +    abort ();
>> +
>> +      if (u64_ld[i][1] != u64_update[i])
>> +    abort ();
>> +    }
>> +
>> +  return 0;
>> +}
>>
>>
>>
>

Re: [PING^2] [PATCH v2] rs6000: Adding missed ISA 3.0 atomic memory operation instructions.

Reply via email to