Gentle ping. Also requesting backport to previous branches (13 to 16)

Thanks,
Avinash

On Thu, 2026-06-04 at 16:02 +0530, Avinash Jayakar wrote:
> Hi,
> 
> Incorporated the changes requested in
> https://gcc.gnu.org/pipermail/gcc-patches/2026-June/718973.html
> 
> Bootstrappend and regtested on powerpc64le and powerpc64 (32 and 64
> bit)
> variants with no regressions. Kindly review.
> 
> A request came in to backport this patch to gcc 15, is it ok for
> backport
> for 16 and 15?
> 
> Changes from v4:
> * Use expand_expr and convert_memory_address instead of expand_normal
> for optimal use of indexed mode addressing.
> * Added tests to check indexed mode addressing.
> * Updated extend.texi with builtin documentation.
> * Use dejagnu-cpu=power8, since l<b,h,q>arx require ISA 2.06.
> 
> Changes from v3:
> * Remove case 5 in altivec_build_resolved_builtin
> 
> Changes from v2:
> * Updated commit message with hint explaination.
> * Expanded test case macros.
> 
> Thanks and regards,
> Avinash Jayakar
> 
> This patch adds a new powerpc specific atomic builtin which is
> similar
> to the generic __atomic_compare_exchange builtin.
> 
> bool __builtin_ppc_atomic_cas_local (type *ptr, type *expected,
>                                    type *desired, bool weak,
>                                    int success_memorder,
>                                    int failure_memorder)
> 
> It behaves like __atomic_compare_exchange(), but it uses an EH value
> of
> 1 in the larx (load-and-reserve) instruction, which provides a hint
> whether the program will perform a subsequent store to the specified
> location. The new builtin helps optimize lock contention on PowerPC
> by
> keeping the lock cacheline in the local processor longer, reducing
> performance penalties from cache coherence protocol traffic.
> 
> 2026-06-04  Avinash Jayakar  <[email protected]>
>           Surya Kumari Jangala  <[email protected]>
> 
> gcc/ChangeLog:
>       * config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin):
> Add logic to
>       handle __builtin_ppc_atomic_cas_local.
>       * config/rs6000/rs6000-builtins.def: New builtins for
>       __builtin_ppc_atomic_cas_local with types.
>       * config/rs6000/rs6000-c.cc
> (altivec_build_resolved_builtin): Handle
>       builtins with up to 6 arguments.
>       * config/rs6000/rs6000-overload.def: Overload builtin for
> signed/unsiged
>       char, short, int, long, __int128.
>       * config/rs6000/rs6000-protos.h
> (rs6000_expand_atomic_compare_and_swap): Add
>       additional parameter 'local' to the prototype.
>       * config/rs6000/rs6000.cc (emit_load_locked): Add new
> parameter. Pass new
>       parameter to generate load-locked instruction.
>       (rs6000_expand_atomic_compare_and_swap): Add new parameter.
> Call
>       emit_load_locked() with additional parameter value of EH
> bit.
>       (rs6000_expand_atomic_exchange): Pass EH value 0 to
> emit_load_locked().
>       (rs6000_expand_atomic_op): Likewise.
>       * config/rs6000/sync.md (load_locked<mode>): Add new operand
> in RTL template.
>       Specify EH bit in the larx instruction.
>       (load_locked<QHI:mode>_si): Likewise.
>       (load_lockedpti): Likewise.
>       (load_lockedti): Add new operand in RTL template. Pass EH
> bit to
>       gen_load_lockedpti().
>       (atomic_compare_and_swap<mode>): Pass new parameter 'false'
> to
>       rs6000_expand_atomic_compare_and_swap.
>       (atomic_compare_and_swap_local<mode>): New define_expand.
>       doc/extend.texi: Add documentation for new builtin.
> 
> gcc/testsuite/ChangeLog:
>       * gcc.target/powerpc/acmp-tst-32bit.c: New test.
>       * gcc.target/powerpc/acmp-tst.c: New test.
>       * gcc.target/powerpc/acmp-tst-indexed.c: New test.
> ---
>  gcc/config/rs6000/rs6000-builtin.cc           | 106 +++++++++++
>  gcc/config/rs6000/rs6000-builtins.def         |  17 ++
>  gcc/config/rs6000/rs6000-c.cc                 | 119 +++++++++++-
>  gcc/config/rs6000/rs6000-overload.def         |   8 +
>  gcc/config/rs6000/rs6000-protos.h             |   2 +-
>  gcc/config/rs6000/rs6000.cc                   |  17 +-
>  gcc/config/rs6000/sync.md                     |  37 +++-
>  gcc/doc/extend.texi                           |  23 +++
>  .../gcc.target/powerpc/acmp-tst-32bit.c       |  64 +++++++
>  .../gcc.target/powerpc/acmp-tst-indexed.c     | 165 ++++++++++++++++
>  gcc/testsuite/gcc.target/powerpc/acmp-tst.c   | 176
> ++++++++++++++++++
>  11 files changed, 714 insertions(+), 20 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/acmp-tst-32bit.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/acmp-tst-
> indexed.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/acmp-tst.c
> 
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc
> b/gcc/config/rs6000/rs6000-builtin.cc
> index 4d0e541351f..43d2478bf75 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -3289,6 +3289,112 @@ rs6000_expand_builtin (tree exp, rtx target,
> rtx /* subtarget */,
>        return expand_call (exp, target, ignore);
>      }
>  
> +  if (fcode == RS6000_BIF_PPC_ATOMIC_CAS_QI
> +      || fcode == RS6000_BIF_PPC_ATOMIC_CAS_HI
> +      || fcode == RS6000_BIF_PPC_ATOMIC_CAS_SI
> +      || fcode == RS6000_BIF_PPC_ATOMIC_CAS_DI
> +      || fcode == RS6000_BIF_PPC_ATOMIC_CAS_TI)
> +    {
> +      machine_mode mode; // Get mode based on BIF ID (QImode,
> SImode, etc.)
> +
> +      switch (fcode)
> +     {
> +     case RS6000_BIF_PPC_ATOMIC_CAS_QI:
> +       mode = QImode;
> +       icode = CODE_FOR_atomic_compare_and_swap_localqi;
> +       break;
> +     case RS6000_BIF_PPC_ATOMIC_CAS_HI:
> +       mode = HImode;
> +       icode = CODE_FOR_atomic_compare_and_swap_localhi;
> +       break;
> +     case RS6000_BIF_PPC_ATOMIC_CAS_SI:
> +       mode = SImode;
> +       icode = CODE_FOR_atomic_compare_and_swap_localsi;
> +       break;
> +     case RS6000_BIF_PPC_ATOMIC_CAS_DI:
> +       mode = DImode;
> +       icode = CODE_FOR_atomic_compare_and_swap_localdi;
> +       break;
> +     case RS6000_BIF_PPC_ATOMIC_CAS_TI:
> +       mode = TImode;
> +       icode = CODE_FOR_atomic_compare_and_swap_localti;
> +       break;
> +     default:
> +       gcc_unreachable ();
> +     }
> +
> +      // For arg 0 (ptr to data)
> +      rtx ptr = expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
> Pmode,
> +                          EXPAND_SUM);
> +      ptr = convert_memory_address (Pmode, ptr);
> +      rtx mem = gen_rtx_MEM (mode, ptr);
> +
> +      // For arg 1 (expected ptr)
> +      rtx exp_ptr = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
> Pmode,
> +                              EXPAND_SUM);
> +      exp_ptr = convert_memory_address (Pmode, exp_ptr);
> +      rtx expected_val = gen_reg_rtx (mode);
> +      emit_move_insn (expected_val, gen_rtx_MEM (mode, exp_ptr));
> +
> +      // For arg 2 (desired ptr)
> +      rtx desired_ptr = expand_expr (CALL_EXPR_ARG (exp, 2),
> NULL_RTX, Pmode,
> +                                  EXPAND_SUM);
> +      desired_ptr = convert_memory_address (Pmode, desired_ptr);
> +      rtx desired_val = gen_reg_rtx (mode);
> +      emit_move_insn (desired_val, gen_rtx_MEM (mode, desired_ptr));
> +
> +      // Args 3, 4, 5: weak, succ, fail (constants)
> +      rtx weak = expand_normal (CALL_EXPR_ARG (exp, 3));
> +      rtx succ = expand_normal (CALL_EXPR_ARG (exp, 4));
> +      rtx fail = expand_normal (CALL_EXPR_ARG (exp, 5));
> +
> +      // 0: Boolean return (Output)
> +      struct expand_operand ops[8];
> +      create_output_operand (&ops[0], target, SImode);
> +
> +      // 1: Old value return (Output)
> +      rtx old_val = gen_reg_rtx (mode);
> +      create_output_operand (&ops[1], old_val, mode);
> +
> +      // 2: The Memory (Fixed/Input - it's a MEM rtx)
> +      // We use create_fixed_operand because it's a specific MEM
> location
> +      create_fixed_operand (&ops[2], mem);
> +
> +      // 3: Expected Value (Input)
> +      create_input_operand (&ops[3], expected_val, mode);
> +
> +      // 4: Desired Value (Input)
> +      create_input_operand (&ops[4], desired_val, mode);
> +
> +      // 5, 6, 7: Weak, Success, Failure (Immediate/Constants)
> +      create_input_operand (&ops[5], weak, SImode);
> +      create_input_operand (&ops[6], succ, SImode);
> +      create_input_operand (&ops[7], fail, SImode);
> +
> +      // Now call expand_insn with the ops array
> +      if (!maybe_expand_insn (icode, 8, ops))
> +     error ("invalid arguments to builtin");
> +
> +      // Create a label for the end of the function.
> +      rtx done_label = gen_label_rtx ();
> +
> +      /* Standard Semantics: Update 'expected' ONLY on failure.
> +      If target (the boolean result) is NOT 0, the CAS succeeded.
> +      In the case of success, we jump straight to the end.  */
> +
> +      // If target != 0 (Success), skip the store.
> +      emit_cmp_and_jump_insns (target, const0_rtx, NE, NULL_RTX,
> +                            SImode, 1, done_label);
> +
> +      // FAILURE PATH: This code runs only if target == 0.
> +      rtx expected_mem = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode,
> exp_ptr));
> +      emit_move_insn (expected_mem, old_val);
> +
> +      emit_label (done_label);
> +
> +      return target;
> +    }
> +
>    if (bif_is_nosoft (*bifaddr)
>        && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
>      {
> diff --git a/gcc/config/rs6000/rs6000-builtins.def
> b/gcc/config/rs6000/rs6000-builtins.def
> index 0d1529b71d4..a51d8cbc0ae 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -246,6 +246,23 @@
>    const double __builtin_unpack_longdouble (long double, const
> int<1>);
>      UNPACK_TF unpacktf {ibmld}
>  
> +; Builtins for ppc specific atomic compare exchange
> +  bool __builtin_ppc_atomic_cas_local_qi (char *, char *, char *,
> const int, \
> +                                       const int, const int);
> +    PPC_ATOMIC_CAS_QI nothing {}
> +  bool __builtin_ppc_atomic_cas_local_hi (short *, short *, short *,
> \
> +                                       const int, const int,
> const int);
> +    PPC_ATOMIC_CAS_HI nothing {}
> +  bool __builtin_ppc_atomic_cas_local_si (int *, int *, int *, const
> int, \
> +                                       const int, const int);
> +    PPC_ATOMIC_CAS_SI nothing {}
> +  bool __builtin_ppc_atomic_cas_local_di (long long *, long long *,
> \
> +                                       long long *, const int,
> const int, \
> +                                       const int);
> +    PPC_ATOMIC_CAS_DI nothing {}
> +  bool __builtin_ppc_atomic_cas_local_ti (__int128 *, __int128 *,
> __int128 *, \
> +                                       const int, const int,
> const int);
> +    PPC_ATOMIC_CAS_TI nothing {}
>  
>  ; Builtins that have been around just about forever, but not quite.
>  [power5]
> diff --git a/gcc/config/rs6000/rs6000-c.cc
> b/gcc/config/rs6000/rs6000-c.cc
> index 3fa7c04a7ce..3cbdb6fb2ba 100644
> --- a/gcc/config/rs6000/rs6000-c.cc
> +++ b/gcc/config/rs6000/rs6000-c.cc
> @@ -929,7 +929,7 @@ altivec_build_resolved_builtin (tree *args, int
> n, tree fntype, tree ret_type,
>  
>    /* If the number of arguments to an overloaded function increases,
>       we must expand this switch.  */
> -  gcc_assert (MAX_OVLD_ARGS <= 4);
> +  gcc_assert (MAX_OVLD_ARGS <= 6);
>  
>    tree call;
>    switch (n)
> @@ -949,6 +949,10 @@ altivec_build_resolved_builtin (tree *args, int
> n, tree fntype, tree ret_type,
>      case 4:
>        call = build_call_expr (fndecl, 4, args[0], args[1], args[2],
> args[3]);
>        break;
> +    case 6:
> +      call = build_call_expr (fndecl, 6, args[0], args[1], args[2],
> args[3],
> +                           args[4], args[5]);
> +      break;
>      default:
>        gcc_unreachable ();
>      }
> @@ -1710,11 +1714,122 @@ find_instance (bool *unsupported_builtin,
> int *instance,
>  
>  tree
>  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
> -                                 void *passed_arglist, bool)
> +                                 void *passed_arglist, bool
> complain)
>  {
>    rs6000_gen_builtins fcode
>      = (rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
>  
> +  /* Handle __builtin_ppc_atomic_cas_local before standard overload
> +     processing.  */
> +  if (fcode == RS6000_OVLD_PPC_ATOMIC_CAS)
> +    {
> +      vec<tree, va_gc> *arglist
> +     = static_cast<vec<tree, va_gc> *> (passed_arglist);
> +
> +      /* Expected: (void *ptr, void *expected, void *desired,
> +      bool weak, int success_order, int failure_order).  */
> +      if (vec_safe_length (arglist) != 6)
> +     {
> +       if (complain)
> +         error_at (loc, "%qE requires 6 arguments", fndecl);
> +       return error_mark_node;
> +     }
> +
> +      /* Get the first argument to determine the actual type.  */
> +      tree arg0 = (*arglist)[0];
> +      tree type0 = TREE_TYPE (arg0);
> +
> +      /* Must be a pointer.  */
> +      if (!POINTER_TYPE_P (type0))
> +     {
> +       if (complain)
> +         error_at (loc, "first argument to %qE must be a
> pointer", fndecl);
> +       return error_mark_node;
> +     }
> +
> +      /* Get the pointee type.  */
> +      tree pointee_type = TREE_TYPE (type0);
> +
> +      /* Must be a complete type.  */
> +      if (!COMPLETE_TYPE_P (pointee_type))
> +     {
> +       if (complain)
> +         error_at (loc, "first argument to %qE must point to a
> complete"
> +                   " type", fndecl);
> +       return error_mark_node;
> +     }
> +
> +      /* Get size in bytes.  */
> +      tree size_tree = TYPE_SIZE_UNIT (pointee_type);
> +      if (!tree_fits_uhwi_p (size_tree))
> +     {
> +       if (complain)
> +         error_at (loc, "type size must be constant");
> +       return error_mark_node;
> +     }
> +
> +      unsigned HOST_WIDE_INT size = tree_to_uhwi (size_tree);
> +
> +      /* Determine which size-specific builtin to use.  */
> +      rs6000_gen_builtins target_fcode;
> +      tree int_type;
> +
> +      switch (size)
> +     {
> +     case 1:
> +       target_fcode = RS6000_BIF_PPC_ATOMIC_CAS_QI;
> +       int_type = unsigned_char_type_node;
> +       break;
> +     case 2:
> +       target_fcode = RS6000_BIF_PPC_ATOMIC_CAS_HI;
> +       int_type = short_unsigned_type_node;
> +       break;
> +     case 4:
> +       target_fcode = RS6000_BIF_PPC_ATOMIC_CAS_SI;
> +       int_type = unsigned_intSI_type_node;
> +       break;
> +     case 8:
> +       target_fcode = RS6000_BIF_PPC_ATOMIC_CAS_DI;
> +       int_type = long_long_unsigned_type_node;
> +       break;
> +     case 16:
> +       target_fcode = RS6000_BIF_PPC_ATOMIC_CAS_TI;
> +       int_type = unsigned_intTI_type_node;
> +       break;
> +     default:
> +       if (complain)
> +         error_at (loc, "size %wu not supported for %qE "
> +                   "(must be 1, 2, 4, 8, or 16 bytes)", size,
> fndecl);
> +       return error_mark_node;
> +     }
> +
> +      /* Create pointer type to the appropriate integer type.  */
> +      tree int_ptr_type = build_pointer_type (int_type);
> +
> +      /* Cast the three pointer arguments to the appropriate integer
> +      pointer type.  */
> +      tree new_arg0 = build1 (VIEW_CONVERT_EXPR, int_ptr_type,
> (*arglist)[0]);
> +      tree new_arg1 = build1 (VIEW_CONVERT_EXPR, int_ptr_type,
> (*arglist)[1]);
> +      tree new_arg2 = build1 (VIEW_CONVERT_EXPR, int_ptr_type,
> (*arglist)[2]);
> +
> +      /* Build new argument list with casted pointers.  */
> +      vec<tree, va_gc> *new_arglist;
> +      vec_alloc (new_arglist, 6);
> +      new_arglist->quick_push (new_arg0);
> +      new_arglist->quick_push (new_arg1);
> +      new_arglist->quick_push (new_arg2);
> +      new_arglist->quick_push ((*arglist)[3]);  /* weak (bool).  */
> +      new_arglist->quick_push ((*arglist)[4]);  /*
> success_memorder.  */
> +      new_arglist->quick_push ((*arglist)[5]);  /*
> failure_memorder.  */
> +
> +      /* Get the target builtin function.  */
> +      tree new_fndecl = rs6000_builtin_decls[target_fcode];
> +
> +      /* Build and return the function call.  */
> +      return build_function_call_vec (loc, vNULL, new_fndecl,
> new_arglist,
> +                                   NULL, fndecl);
> +    }
> +
>    /* Return immediately if this isn't an overload.  */
>    if (fcode <= RS6000_OVLD_NONE)
>      return NULL_TREE;
> diff --git a/gcc/config/rs6000/rs6000-overload.def
> b/gcc/config/rs6000/rs6000-overload.def
> index ef7b59ed112..df97180cc0c 100644
> --- a/gcc/config/rs6000/rs6000-overload.def
> +++ b/gcc/config/rs6000/rs6000-overload.def
> @@ -79,6 +79,14 @@
>  ; a semicolon are also treated as blank lines.
>  
>  
> +; The following function is not overloaded, but is internally
> substituted by
> +; __builtin_ppc_atomic_cas_local_{qi,hi,si,di,ti} based on the first
> 3
> +; arguments.
> +[PPC_ATOMIC_CAS, SKIP, __builtin_ppc_atomic_cas_local]
> +  bool __builtin_ppc_atomic_cas_local (void *, void *, void *, const
> int, \
> +                                    const int, const int);
> +    PPC_ATOMIC_CAS_QI PPC_ATOMIC_CAS_FAKERY
> +
>  [BCDADD, __builtin_bcdadd, __builtin_vec_bcdadd]
>    vsq __builtin_vec_bcdadd (vsq, vsq, const int);
>      BCDADD_V1TI
> diff --git a/gcc/config/rs6000/rs6000-protos.h
> b/gcc/config/rs6000/rs6000-protos.h
> index 09424ebaf97..5efca2d5834 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -127,7 +127,7 @@ extern bool rs6000_emit_set_const (rtx, rtx);
>  extern bool rs6000_emit_cmove (rtx, rtx, rtx, rtx);
>  extern bool rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
>  extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
> -extern void rs6000_expand_atomic_compare_and_swap (rtx op[]);
> +extern void rs6000_expand_atomic_compare_and_swap (rtx op[], bool
> local);
>  extern rtx swap_endian_selector_for_mode (machine_mode mode);
>  
>  extern void rs6000_expand_atomic_exchange (rtx op[]);
> diff --git a/gcc/config/rs6000/rs6000.cc
> b/gcc/config/rs6000/rs6000.cc
> index 5562d612b22..bc087ff21f8 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -16748,12 +16748,13 @@ emit_unlikely_jump (rtx cond, rtx label)
>  
>  /* A subroutine of the atomic operation splitters.  Emit a load-
> locked
>     instruction in MODE.  For QI/HImode, possibly use a pattern than
> includes
> -   the zero_extend operation.  */
> +   the zero_extend operation.  LOCAL indicates the EH bit value for
> the
> +   load-locked instruction.  */
>  
>  static void
> -emit_load_locked (machine_mode mode, rtx reg, rtx mem)
> +emit_load_locked (machine_mode mode, rtx reg, rtx mem, rtx local)
>  {
> -  rtx (*fn) (rtx, rtx) = NULL;
> +  rtx (*fn) (rtx, rtx, rtx) = NULL;
>  
>    switch (mode)
>      {
> @@ -16780,7 +16781,7 @@ emit_load_locked (machine_mode mode, rtx reg,
> rtx mem)
>      default:
>        gcc_unreachable ();
>      }
> -  emit_insn (fn (reg, mem));
> +  emit_insn (fn (reg, mem, local));
>  }
>  
>  /* A subroutine of the atomic operation splitters.  Emit a store-
> conditional
> @@ -16950,7 +16951,7 @@ rs6000_finish_atomic_subword (rtx narrow, rtx
> wide, rtx shift)
>  /* Expand an atomic compare and swap operation.  */
>  
>  void
> -rs6000_expand_atomic_compare_and_swap (rtx operands[])
> +rs6000_expand_atomic_compare_and_swap (rtx operands[], bool local)
>  {
>    rtx boolval, retval, mem, oldval, newval, cond;
>    rtx label1, label2, x, mask, shift;
> @@ -17013,7 +17014,7 @@ rs6000_expand_atomic_compare_and_swap (rtx
> operands[])
>      }
>    label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
>  
> -  emit_load_locked (mode, retval, mem);
> +  emit_load_locked (mode, retval, mem, local ? const1_rtx :
> const0_rtx);
>  
>    x = retval;
>    if (mask)
> @@ -17111,7 +17112,7 @@ rs6000_expand_atomic_exchange (rtx
> operands[])
>    label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
>    emit_label (XEXP (label, 0));
>  
> -  emit_load_locked (mode, retval, mem);
> +  emit_load_locked (mode, retval, mem, const0_rtx);
>  
>    x = val;
>    if (mask)
> @@ -17216,7 +17217,7 @@ rs6000_expand_atomic_op (enum rtx_code code,
> rtx mem, rtx val,
>    if (before == NULL_RTX)
>      before = gen_reg_rtx (mode);
>  
> -  emit_load_locked (mode, before, mem);
> +  emit_load_locked (mode, before, mem, const0_rtx);
>  
>    if (code == NOT)
>      {
> diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
> index 008aaf84937..7299dccac6f 100644
> --- a/gcc/config/rs6000/sync.md
> +++ b/gcc/config/rs6000/sync.md
> @@ -278,17 +278,19 @@ (define_mode_iterator ATOMIC [(QI
> "TARGET_SYNC_HI_QI")
>  (define_insn "load_locked<mode>"
>    [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
>       (unspec_volatile:ATOMIC
> -         [(match_operand:ATOMIC 1 "memory_operand" "Z")]
> UNSPECV_LL))]
> +       [(match_operand:ATOMIC 1 "memory_operand" "Z")
> +        (match_operand:QI 2 "u1bit_cint_operand" "n")]
> UNSPECV_LL))]
>    ""
> -  "<larx> %0,%y1"
> +  "<larx> %0,%y1,%2"
>    [(set_attr "type" "load_l")])
>  
>  (define_insn "load_locked<QHI:mode>_si"
>    [(set (match_operand:SI 0 "int_reg_operand" "=r")
>       (unspec_volatile:SI
> -       [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
> +       [(match_operand:QHI 1 "memory_operand" "Z")
> +        (match_operand:QI 2 "u1bit_cint_operand" "n")]
> UNSPECV_LL))]
>    "TARGET_SYNC_HI_QI"
> -  "<QHI:larx> %0,%y1"
> +  "<QHI:larx> %0,%y1,%2"
>    [(set_attr "type" "load_l")])
>  
>  ;; Use PTImode to get even/odd register pairs.
> @@ -302,7 +304,8 @@ (define_insn "load_locked<QHI:mode>_si"
>  
>  (define_expand "load_lockedti"
>    [(use (match_operand:TI 0 "quad_int_reg_operand"))
> -   (use (match_operand:TI 1 "memory_operand"))]
> +   (use (match_operand:TI 1 "memory_operand"))
> +   (use (match_operand:QI 2 "u1bit_cint_operand"))]
>    "TARGET_SYNC_TI"
>  {
>    rtx op0 = operands[0];
> @@ -316,7 +319,7 @@ (define_expand "load_lockedti"
>        operands[1] = op1 = change_address (op1, TImode, new_addr);
>      }
>  
> -  emit_insn (gen_load_lockedpti (pti, op1));
> +  emit_insn (gen_load_lockedpti (pti, op1, operands[2]));
>    if (WORDS_BIG_ENDIAN)
>      emit_move_insn (op0, gen_lowpart (TImode, pti));
>    else
> @@ -330,11 +333,12 @@ (define_expand "load_lockedti"
>  (define_insn "load_lockedpti"
>    [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
>       (unspec_volatile:PTI
> -         [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")]
> UNSPECV_LL))]
> +       [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")
> +        (match_operand:QI 2 "u1bit_cint_operand" "n")]
> UNSPECV_LL))]
>    "TARGET_SYNC_TI
>     && !reg_mentioned_p (operands[0], operands[1])
>     && quad_int_reg_operand (operands[0], PTImode)"
> -  "lqarx %0,%y1"
> +  "lqarx %0,%y1,%2"
>    [(set_attr "type" "load_l")
>     (set_attr "size" "128")])
>  
> @@ -411,7 +415,22 @@ (define_expand "atomic_compare_and_swap<mode>"
>     (match_operand:SI 7 "const_int_operand")]         ;; model
> fail
>    ""
>  {
> -  rs6000_expand_atomic_compare_and_swap (operands);
> +  rs6000_expand_atomic_compare_and_swap (operands, false);
> +  DONE;
> +})
> +
> +(define_expand "atomic_compare_and_swap_local<mode>"
> +  [(match_operand:SI 0 "int_reg_operand")            ;; bool out
> +   (match_operand:AINT 1 "int_reg_operand")          ;; val out
> +   (match_operand:AINT 2 "memory_operand")           ;; memory
> +   (match_operand:AINT 3 "reg_or_short_operand")     ;; expected
> +   (match_operand:AINT 4 "int_reg_operand")          ;; desired
> +   (match_operand:SI 5 "const_int_operand")          ;; is_weak
> +   (match_operand:SI 6 "const_int_operand")          ;; model
> succ
> +   (match_operand:SI 7 "const_int_operand")]         ;; model
> fail
> +  ""
> +{
> +  rs6000_expand_atomic_compare_and_swap (operands, true);
>    DONE;
>  })
>  
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 48f10feb8af..26869a9c59d 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -24003,6 +24003,29 @@ defined, then the
> @code{__builtin_set_fpscr_rn} built-in returns the FPSCR
>  fields.  If not defined, the @code{__builtin_set_fpscr_rn} does not
> return a
>  value.  If the @option{-msoft-float} option is used, the
>  @code{__builtin_set_fpscr_rn} built-in will not return a value.
> +@defbuiltin{bool __builtin_ppc_atomic_cas_local (@var{type}
> *@var{ptr}, @var{type} *@var{expected}, @var{type} *@var{desired},
> bool @var{weak}, int @var{success_memorder}, int
> @var{failure_memorder})}
> +This built-in function implements a PowerPC-specific atomic compare
> and
> +exchange operation.  It behaves identically to the generic
> +@code{__atomic_compare_exchange} built-in function, with one key
> difference:
> +it uses an EH (Extended Hint) value of 1 in the @code{lbarx},
> @code{lharx},
> +@code{lwarx}, or @code{ldarx} instruction (load-and-reserve
> indexed).
> +
> +The EH bit provides a hint to the processor that the program will
> perform a
> +subsequent store to the specified location.  This hint helps
> optimize lock
> +contention on PowerPC systems by keeping the lock cacheline in the
> local
> +processor longer, reducing performance penalties from cache
> coherence protocol
> +traffic.
> +
> +For details on the behavior and semantics of the arguments, refer to
> the
> +@code{__atomic_compare_exchange} documentation.
> +
> +The first three pointer arguments (@var{ptr}, @var{expected}, and
> +@var{desired}) must point to complete types, and the size of the
> types they
> +point to must be the same and known at compile time.  The types
> themselves may
> +differ.  The supported type sizes are 1, 2, 4, 8, or 16 bytes.
> +
> +@enddefbuiltin
> +
>  
>  @node Basic PowerPC Built-in Functions Available on ISA 2.05
>  @subsubsection Basic PowerPC Built-in Functions Available on ISA
> 2.05
> diff --git a/gcc/testsuite/gcc.target/powerpc/acmp-tst-32bit.c
> b/gcc/testsuite/gcc.target/powerpc/acmp-tst-32bit.c
> new file mode 100644
> index 00000000000..d1d9953abf4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/acmp-tst-32bit.c
> @@ -0,0 +1,64 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target ilp32 } */
> +
> +// Need power7 for l<b,h>arx
> +/* { dg-options "-O2 -mdejagnu-cpu=power7" } */
> +
> +bool
> +word_exchange_qi (signed char *ptr, signed char *expected, signed
> char *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_uqi (unsigned char *ptr, unsigned char *expected,
> +                   unsigned char *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_hi (short *ptr, short *expected, short *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_shi (signed short *ptr, signed short *expected,
> +                   signed short *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_uhi (unsigned short *ptr, unsigned short *expected,
> +                   unsigned short *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_si (int *ptr, int *expected, int *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_ssi (signed int *ptr, signed int *expected, signed int
> *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_usi (unsigned int *ptr, unsigned int *expected,
> +                   unsigned int *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mlbarx +[0-9]+,[0-9]+,[0-
> 9]+,1} 2 } } */
> +/* { dg-final { scan-assembler-times {\mlharx +[0-9]+,[0-9]+,[0-
> 9]+,1} 3 } } */
> +/* { dg-final { scan-assembler-times {\mlwarx +[0-9]+,[0-9]+,[0-
> 9]+,1} 3 } } */
> +/* { dg-final { scan-assembler-times {\mldarx +[0-9]+,[0-9]+,[0-
> 9]+,1} 3 } } */
> +/* { dg-final { scan-assembler-times {\mlqarx +[0-9]+,[0-9]+,[0-
> 9]+,1} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/acmp-tst-indexed.c
> b/gcc/testsuite/gcc.target/powerpc/acmp-tst-indexed.c
> new file mode 100644
> index 00000000000..c0d2bf3dcb3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/acmp-tst-indexed.c
> @@ -0,0 +1,165 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target lp64 } */
> +
> +// Need power8 for l<b,h,q>arx
> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
> +
> +typedef struct udt_1
> +{
> +  char *a;
> +} udt_1t;
> +typedef struct udt_2
> +{
> +  char a;
> +  char b;
> +} udt_2t;
> +typedef struct udt_4
> +{
> +  short a;
> +  short b;
> +} udt_4t;
> +typedef struct udt_8
> +{
> +  int a;
> +  int b;
> +} udt_8t;
> +bool
> +word_exchange_nqi (char *ptr, char *expected, char *desired,
> +                unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_qi (signed char *ptr, signed char *expected, signed
> char *desired,
> +               unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_uqi (unsigned char *ptr, unsigned char *expected,
> +                   unsigned char *desired, unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_hi (short *ptr, short *expected, short *desired,
> +               unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_shi (signed short *ptr, signed short *expected,
> +                   signed short *desired, unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_uhi (unsigned short *ptr, unsigned short *expected,
> +                   unsigned short *desired, unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_si (int *ptr, int *expected, int *desired, unsigned
> long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_ssi (signed int *ptr, signed int *expected, signed int
> *desired,
> +                unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_usi (unsigned int *ptr, unsigned int *expected,
> +                   unsigned int *desired, unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_di (long long *ptr, long long *expected, long long
> *desired,
> +               unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_sdi (signed long long *ptr, signed long long
> *expected,
> +                   signed long long *desired, unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udi (unsigned long long *ptr, unsigned long long
> *expected,
> +                   unsigned long long *desired, unsigned long long
> n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_f32 (float *ptr, float *expected, float *desired,
> +                unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_f64 (double *ptr, double *expected, double *desired,
> +                unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udt_1 (udt_1t *ptr, udt_1t *expected, udt_1t *desired,
> +                  unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udt_2 (udt_2t *ptr, udt_2t *expected, udt_2t *desired,
> +                  unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udt_4 (udt_4t *ptr, udt_4t *expected, udt_4t *desired,
> +                  unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udt_8 (udt_8t *ptr, udt_8t *expected, udt_8t *desired,
> +                  unsigned long long n)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr+n, expected+n,
> desired+n, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +
> +/* Test if indexed mode addresses are used, r6 must be used which
> corresponds
> +   to the argument n in all functions.  */
> +
> +/* { dg-final { scan-assembler-times {\mlbarx +[0-9]+,[0-9]+,6,1} 3
> } } */
> +/* { dg-final { scan-assembler-times {\mstbcx. +[0-9]+,[0-9]+,6} 3 }
> } */
> +
> +/* { dg-final { scan-assembler-times {\mlharx +[0-9]+,[0-9]+,6,1} 4
> } } */
> +/* { dg-final { scan-assembler-times {\msthcx. +[0-9]+,[0-9]+,6} 4 }
> } */
> +
> +/* { dg-final { scan-assembler-times {\mlwarx +[0-9]+,[0-9]+,6,1} 5
> } } */
> +/* { dg-final { scan-assembler-times {\mstwcx. +[0-9]+,[0-9]+,6} 5 }
> } */
> +
> +/* { dg-final { scan-assembler-times {\mldarx +[0-9]+,[0-9]+,6,1} 6
> } } */
> +/* { dg-final { scan-assembler-times {\mstdcx. +[0-9]+,[0-9]+,6} 6 }
> } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/acmp-tst.c
> b/gcc/testsuite/gcc.target/powerpc/acmp-tst.c
> new file mode 100644
> index 00000000000..bb15805819f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/acmp-tst.c
> @@ -0,0 +1,176 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target lp64 } */
> +
> +// Need power8 for l<b,h,q>arx
> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
> +
> +typedef struct udt_1
> +{
> +  char *a;
> +} udt_1t;
> +typedef struct udt_2
> +{
> +  char a;
> +  char b;
> +} udt_2t;
> +typedef struct udt_4
> +{
> +  short a;
> +  short b;
> +} udt_4t;
> +typedef struct udt_8
> +{
> +  int a;
> +  int b;
> +} udt_8t;
> +typedef struct udt_16
> +{
> +  long long a;
> +  long long b;
> +} udt_16t;
> +bool
> +word_exchange_nqi (char *ptr, char *expected, char *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_qi (signed char *ptr, signed char *expected, signed
> char *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_uqi (unsigned char *ptr, unsigned char *expected,
> +                   unsigned char *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_hi (short *ptr, short *expected, short *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_shi (signed short *ptr, signed short *expected,
> +                   signed short *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_uhi (unsigned short *ptr, unsigned short *expected,
> +                   unsigned short *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_si (int *ptr, int *expected, int *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_ssi (signed int *ptr, signed int *expected, signed int
> *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_usi (unsigned int *ptr, unsigned int *expected,
> +                   unsigned int *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_di (long long *ptr, long long *expected, long long
> *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_sdi (signed long long *ptr, signed long long
> *expected,
> +                   signed long long *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udi (unsigned long long *ptr, unsigned long long
> *expected,
> +                   unsigned long long *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_sti (signed __int128 *ptr, signed __int128 *expected,
> +                   signed __int128 *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_uti (unsigned __int128 *ptr, unsigned __int128
> *expected,
> +                   unsigned __int128 *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_f32 (float *ptr, float *expected, float *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_f64 (double *ptr, double *expected, double *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_f128 (__ieee128 *ptr, __ieee128 *expected, __ieee128
> *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udt_1 (udt_1t *ptr, udt_1t *expected, udt_1t *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udt_2 (udt_2t *ptr, udt_2t *expected, udt_2t *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udt_4 (udt_4t *ptr, udt_4t *expected, udt_4t *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udt_8 (udt_8t *ptr, udt_8t *expected, udt_8t *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +bool
> +word_exchange_udt_16 (udt_16t *ptr, udt_16t *expected, udt_16t
> *desired)
> +{
> +  return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
> +                                      __ATOMIC_SEQ_CST,
> __ATOMIC_ACQUIRE);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mlbarx +[0-9]+,[0-9]+,[0-
> 9]+,1} 3 } } */
> +/* { dg-final { scan-assembler-times {\mlharx +[0-9]+,[0-9]+,[0-
> 9]+,1} 4 } } */
> +/* { dg-final { scan-assembler-times {\mlwarx +[0-9]+,[0-9]+,[0-
> 9]+,1} 5 } } */
> +/* { dg-final { scan-assembler-times {\mldarx +[0-9]+,[0-9]+,[0-
> 9]+,1} 6 } } */
> +/* { dg-final { scan-assembler-times {\mlqarx +[0-9]+,[0-9]+,[0-
> 9]+,1} 4 } } */

Reply via email to