Apologies the previous committed patch was outdated one, and I had pushed
without checking it. Therefore reverted it and pushed the correct one now.
Thanks,
Avinash
This patch adds a new powerpc specific atomic builtin which is similar
to the generic __atomic_compare_exchange builtin.
bool __builtin_ppc_atomic_cas_local (type *ptr, type *expected,
type *desired, bool weak,
int success_memorder,
int failure_memorder)
It behaves like __atomic_compare_exchange(), but it uses an EH value of
1 in the larx (load-and-reserve) instruction, which provides a hint
whether the program will perform a subsequent store to the specified
location. The new builtin helps optimize lock contention on PowerPC by
keeping the lock cacheline in the local processor longer, reducing
performance penalties from cache coherence protocol traffic.
2026-06-30 Avinash Jayakar <[email protected]>
Surya Kumari Jangala <[email protected]>
gcc/ChangeLog:
* config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin): Add logic to
handle __builtin_ppc_atomic_cas_local.
* config/rs6000/rs6000-builtins.def: New builtins for
__builtin_ppc_atomic_cas_local with types.
* config/rs6000/rs6000-c.cc (altivec_build_resolved_builtin): Handle
builtins with up to 6 arguments.
* config/rs6000/rs6000-overload.def: Overload builtin for signed/unsiged
char, short, int, long, __int128.
* config/rs6000/rs6000-protos.h
(rs6000_expand_atomic_compare_and_swap): Add
additional parameter 'local' to the prototype.
* config/rs6000/rs6000.cc (emit_load_locked): Add new parameter. Pass
new
parameter to generate load-locked instruction.
(rs6000_expand_atomic_compare_and_swap): Add new parameter. Call
emit_load_locked() with additional parameter value of EH bit.
(rs6000_expand_atomic_exchange): Pass EH value 0 to emit_load_locked().
(rs6000_expand_atomic_op): Likewise.
* config/rs6000/sync.md (load_locked<mode>): Add new operand in RTL
template.
Specify EH bit in the larx instruction.
(load_locked<QHI:mode>_si): Likewise.
(load_lockedpti): Likewise.
(load_lockedti): Add new operand in RTL template. Pass EH bit to
gen_load_lockedpti().
(atomic_compare_and_swap<mode>): Pass new parameter 'false' to
rs6000_expand_atomic_compare_and_swap.
(atomic_compare_and_swap_local<mode>): New define_expand.
* doc/extend.texi: Add documentation for new builtin.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/acmp-tst-32bit.c: New test.
* gcc.target/powerpc/acmp-tst.c: New test.
* gcc.target/powerpc/acmp-tst-indexed.c: New test.
---
gcc/config/rs6000/rs6000-builtin.cc | 106 +++++++++++
gcc/config/rs6000/rs6000-builtins.def | 17 ++
gcc/config/rs6000/rs6000-c.cc | 119 +++++++++++-
gcc/config/rs6000/rs6000-overload.def | 8 +
gcc/config/rs6000/rs6000-protos.h | 2 +-
gcc/config/rs6000/rs6000.cc | 17 +-
gcc/config/rs6000/sync.md | 37 +++-
gcc/doc/extend.texi | 23 +++
.../gcc.target/powerpc/acmp-tst-32bit.c | 61 ++++++
.../gcc.target/powerpc/acmp-tst-indexed.c | 165 ++++++++++++++++
gcc/testsuite/gcc.target/powerpc/acmp-tst.c | 176 ++++++++++++++++++
11 files changed, 711 insertions(+), 20 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/acmp-tst-32bit.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/acmp-tst-indexed.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/acmp-tst.c
diff --git a/gcc/config/rs6000/rs6000-builtin.cc
b/gcc/config/rs6000/rs6000-builtin.cc
index 541958d38c0..e3aab2b6ef1 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -3316,6 +3316,112 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /*
subtarget */,
return expand_call (exp, target, ignore);
}
+ if (fcode == RS6000_BIF_PPC_ATOMIC_CAS_QI
+ || fcode == RS6000_BIF_PPC_ATOMIC_CAS_HI
+ || fcode == RS6000_BIF_PPC_ATOMIC_CAS_SI
+ || fcode == RS6000_BIF_PPC_ATOMIC_CAS_DI
+ || fcode == RS6000_BIF_PPC_ATOMIC_CAS_TI)
+ {
+ machine_mode mode; // Get mode based on BIF ID (QImode, SImode, etc.)
+
+ switch (fcode)
+ {
+ case RS6000_BIF_PPC_ATOMIC_CAS_QI:
+ mode = QImode;
+ icode = CODE_FOR_atomic_compare_and_swap_localqi;
+ break;
+ case RS6000_BIF_PPC_ATOMIC_CAS_HI:
+ mode = HImode;
+ icode = CODE_FOR_atomic_compare_and_swap_localhi;
+ break;
+ case RS6000_BIF_PPC_ATOMIC_CAS_SI:
+ mode = SImode;
+ icode = CODE_FOR_atomic_compare_and_swap_localsi;
+ break;
+ case RS6000_BIF_PPC_ATOMIC_CAS_DI:
+ mode = DImode;
+ icode = CODE_FOR_atomic_compare_and_swap_localdi;
+ break;
+ case RS6000_BIF_PPC_ATOMIC_CAS_TI:
+ mode = TImode;
+ icode = CODE_FOR_atomic_compare_and_swap_localti;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ // For arg 0 (ptr to data)
+ rtx ptr = expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, Pmode,
+ EXPAND_SUM);
+ ptr = convert_memory_address (Pmode, ptr);
+ rtx mem = gen_rtx_MEM (mode, ptr);
+
+ // For arg 1 (expected ptr)
+ rtx exp_ptr = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, Pmode,
+ EXPAND_SUM);
+ exp_ptr = convert_memory_address (Pmode, exp_ptr);
+ rtx expected_val = gen_reg_rtx (mode);
+ emit_move_insn (expected_val, gen_rtx_MEM (mode, exp_ptr));
+
+ // For arg 2 (desired ptr)
+ rtx desired_ptr = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, Pmode,
+ EXPAND_SUM);
+ desired_ptr = convert_memory_address (Pmode, desired_ptr);
+ rtx desired_val = gen_reg_rtx (mode);
+ emit_move_insn (desired_val, gen_rtx_MEM (mode, desired_ptr));
+
+ // Args 3, 4, 5: weak, succ, fail (constants)
+ rtx weak = expand_normal (CALL_EXPR_ARG (exp, 3));
+ rtx succ = expand_normal (CALL_EXPR_ARG (exp, 4));
+ rtx fail = expand_normal (CALL_EXPR_ARG (exp, 5));
+
+ // 0: Boolean return (Output)
+ struct expand_operand ops[8];
+ create_output_operand (&ops[0], target, SImode);
+
+ // 1: Old value return (Output)
+ rtx old_val = gen_reg_rtx (mode);
+ create_output_operand (&ops[1], old_val, mode);
+
+ // 2: The Memory (Fixed/Input - it's a MEM rtx)
+ // We use create_fixed_operand because it's a specific MEM location
+ create_fixed_operand (&ops[2], mem);
+
+ // 3: Expected Value (Input)
+ create_input_operand (&ops[3], expected_val, mode);
+
+ // 4: Desired Value (Input)
+ create_input_operand (&ops[4], desired_val, mode);
+
+ // 5, 6, 7: Weak, Success, Failure (Immediate/Constants)
+ create_input_operand (&ops[5], weak, SImode);
+ create_input_operand (&ops[6], succ, SImode);
+ create_input_operand (&ops[7], fail, SImode);
+
+ // Now call expand_insn with the ops array
+ if (!maybe_expand_insn (icode, 8, ops))
+ error ("invalid arguments to builtin");
+
+ // Create a label for the end of the function.
+ rtx done_label = gen_label_rtx ();
+
+ /* Standard Semantics: Update 'expected' ONLY on failure.
+ If target (the boolean result) is NOT 0, the CAS succeeded.
+ In the case of success, we jump straight to the end. */
+
+ // If target != 0 (Success), skip the store.
+ emit_cmp_and_jump_insns (target, const0_rtx, NE, NULL_RTX,
+ SImode, 1, done_label);
+
+ // FAILURE PATH: This code runs only if target == 0.
+ rtx expected_mem = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, exp_ptr));
+ emit_move_insn (expected_mem, old_val);
+
+ emit_label (done_label);
+
+ return target;
+ }
+
if (bif_is_nosoft (*bifaddr)
&& rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
{
diff --git a/gcc/config/rs6000/rs6000-builtins.def
b/gcc/config/rs6000/rs6000-builtins.def
index 172a97adaa8..ebcbc500b8a 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -246,6 +246,23 @@
const double __builtin_unpack_longdouble (long double, const int<1>);
UNPACK_TF unpacktf {ibmld}
+; Builtins for ppc specific atomic compare exchange
+ bool __builtin_ppc_atomic_cas_local_qi (char *, char *, char *, const int, \
+ const int, const int);
+ PPC_ATOMIC_CAS_QI nothing {}
+ bool __builtin_ppc_atomic_cas_local_hi (short *, short *, short *, \
+ const int, const int, const int);
+ PPC_ATOMIC_CAS_HI nothing {}
+ bool __builtin_ppc_atomic_cas_local_si (int *, int *, int *, const int, \
+ const int, const int);
+ PPC_ATOMIC_CAS_SI nothing {}
+ bool __builtin_ppc_atomic_cas_local_di (long long *, long long *, \
+ long long *, const int, const int, \
+ const int);
+ PPC_ATOMIC_CAS_DI nothing {}
+ bool __builtin_ppc_atomic_cas_local_ti (__int128 *, __int128 *, __int128 *, \
+ const int, const int, const int);
+ PPC_ATOMIC_CAS_TI nothing {}
; Builtins that have been around just about forever, but not quite.
[power5]
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 3fa7c04a7ce..3cbdb6fb2ba 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -929,7 +929,7 @@ altivec_build_resolved_builtin (tree *args, int n, tree
fntype, tree ret_type,
/* If the number of arguments to an overloaded function increases,
we must expand this switch. */
- gcc_assert (MAX_OVLD_ARGS <= 4);
+ gcc_assert (MAX_OVLD_ARGS <= 6);
tree call;
switch (n)
@@ -949,6 +949,10 @@ altivec_build_resolved_builtin (tree *args, int n, tree
fntype, tree ret_type,
case 4:
call = build_call_expr (fndecl, 4, args[0], args[1], args[2], args[3]);
break;
+ case 6:
+ call = build_call_expr (fndecl, 6, args[0], args[1], args[2], args[3],
+ args[4], args[5]);
+ break;
default:
gcc_unreachable ();
}
@@ -1710,11 +1714,122 @@ find_instance (bool *unsupported_builtin, int
*instance,
tree
altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
- void *passed_arglist, bool)
+ void *passed_arglist, bool complain)
{
rs6000_gen_builtins fcode
= (rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+ /* Handle __builtin_ppc_atomic_cas_local before standard overload
+ processing. */
+ if (fcode == RS6000_OVLD_PPC_ATOMIC_CAS)
+ {
+ vec<tree, va_gc> *arglist
+ = static_cast<vec<tree, va_gc> *> (passed_arglist);
+
+ /* Expected: (void *ptr, void *expected, void *desired,
+ bool weak, int success_order, int failure_order). */
+ if (vec_safe_length (arglist) != 6)
+ {
+ if (complain)
+ error_at (loc, "%qE requires 6 arguments", fndecl);
+ return error_mark_node;
+ }
+
+ /* Get the first argument to determine the actual type. */
+ tree arg0 = (*arglist)[0];
+ tree type0 = TREE_TYPE (arg0);
+
+ /* Must be a pointer. */
+ if (!POINTER_TYPE_P (type0))
+ {
+ if (complain)
+ error_at (loc, "first argument to %qE must be a pointer", fndecl);
+ return error_mark_node;
+ }
+
+ /* Get the pointee type. */
+ tree pointee_type = TREE_TYPE (type0);
+
+ /* Must be a complete type. */
+ if (!COMPLETE_TYPE_P (pointee_type))
+ {
+ if (complain)
+ error_at (loc, "first argument to %qE must point to a complete"
+ " type", fndecl);
+ return error_mark_node;
+ }
+
+ /* Get size in bytes. */
+ tree size_tree = TYPE_SIZE_UNIT (pointee_type);
+ if (!tree_fits_uhwi_p (size_tree))
+ {
+ if (complain)
+ error_at (loc, "type size must be constant");
+ return error_mark_node;
+ }
+
+ unsigned HOST_WIDE_INT size = tree_to_uhwi (size_tree);
+
+ /* Determine which size-specific builtin to use. */
+ rs6000_gen_builtins target_fcode;
+ tree int_type;
+
+ switch (size)
+ {
+ case 1:
+ target_fcode = RS6000_BIF_PPC_ATOMIC_CAS_QI;
+ int_type = unsigned_char_type_node;
+ break;
+ case 2:
+ target_fcode = RS6000_BIF_PPC_ATOMIC_CAS_HI;
+ int_type = short_unsigned_type_node;
+ break;
+ case 4:
+ target_fcode = RS6000_BIF_PPC_ATOMIC_CAS_SI;
+ int_type = unsigned_intSI_type_node;
+ break;
+ case 8:
+ target_fcode = RS6000_BIF_PPC_ATOMIC_CAS_DI;
+ int_type = long_long_unsigned_type_node;
+ break;
+ case 16:
+ target_fcode = RS6000_BIF_PPC_ATOMIC_CAS_TI;
+ int_type = unsigned_intTI_type_node;
+ break;
+ default:
+ if (complain)
+ error_at (loc, "size %wu not supported for %qE "
+ "(must be 1, 2, 4, 8, or 16 bytes)", size, fndecl);
+ return error_mark_node;
+ }
+
+ /* Create pointer type to the appropriate integer type. */
+ tree int_ptr_type = build_pointer_type (int_type);
+
+ /* Cast the three pointer arguments to the appropriate integer
+ pointer type. */
+ tree new_arg0 = build1 (VIEW_CONVERT_EXPR, int_ptr_type, (*arglist)[0]);
+ tree new_arg1 = build1 (VIEW_CONVERT_EXPR, int_ptr_type, (*arglist)[1]);
+ tree new_arg2 = build1 (VIEW_CONVERT_EXPR, int_ptr_type, (*arglist)[2]);
+
+ /* Build new argument list with casted pointers. */
+ vec<tree, va_gc> *new_arglist;
+ vec_alloc (new_arglist, 6);
+ new_arglist->quick_push (new_arg0);
+ new_arglist->quick_push (new_arg1);
+ new_arglist->quick_push (new_arg2);
+ new_arglist->quick_push ((*arglist)[3]); /* weak (bool). */
+ new_arglist->quick_push ((*arglist)[4]); /* success_memorder. */
+ new_arglist->quick_push ((*arglist)[5]); /* failure_memorder. */
+
+ /* Get the target builtin function. */
+ tree new_fndecl = rs6000_builtin_decls[target_fcode];
+
+ /* Build and return the function call. */
+ return build_function_call_vec (loc, vNULL, new_fndecl, new_arglist,
+ NULL, fndecl);
+ }
+
/* Return immediately if this isn't an overload. */
if (fcode <= RS6000_OVLD_NONE)
return NULL_TREE;
diff --git a/gcc/config/rs6000/rs6000-overload.def
b/gcc/config/rs6000/rs6000-overload.def
index 09be9e7de71..8755b04d5f0 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -79,6 +79,14 @@
; a semicolon are also treated as blank lines.
+; The following function is not overloaded, but is internally substituted by
+; __builtin_ppc_atomic_cas_local_{qi,hi,si,di,ti} based on the first 3
+; arguments.
+[PPC_ATOMIC_CAS, SKIP, __builtin_ppc_atomic_cas_local]
+ bool __builtin_ppc_atomic_cas_local (void *, void *, void *, const int, \
+ const int, const int);
+ PPC_ATOMIC_CAS_QI PPC_ATOMIC_CAS_FAKERY
+
[BCDADD, __builtin_bcdadd, __builtin_vec_bcdadd]
vsq __builtin_vec_bcdadd (vsq, vsq, const int);
BCDADD_V1TI
diff --git a/gcc/config/rs6000/rs6000-protos.h
b/gcc/config/rs6000/rs6000-protos.h
index 09424ebaf97..5efca2d5834 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -127,7 +127,7 @@ extern bool rs6000_emit_set_const (rtx, rtx);
extern bool rs6000_emit_cmove (rtx, rtx, rtx, rtx);
extern bool rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
-extern void rs6000_expand_atomic_compare_and_swap (rtx op[]);
+extern void rs6000_expand_atomic_compare_and_swap (rtx op[], bool local);
extern rtx swap_endian_selector_for_mode (machine_mode mode);
extern void rs6000_expand_atomic_exchange (rtx op[]);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 86d06cfdaa6..d8669d9ffce 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -16749,12 +16749,13 @@ emit_unlikely_jump (rtx cond, rtx label)
/* A subroutine of the atomic operation splitters. Emit a load-locked
instruction in MODE. For QI/HImode, possibly use a pattern than includes
- the zero_extend operation. */
+ the zero_extend operation. LOCAL indicates the EH bit value for the
+ load-locked instruction. */
static void
-emit_load_locked (machine_mode mode, rtx reg, rtx mem)
+emit_load_locked (machine_mode mode, rtx reg, rtx mem, rtx local)
{
- rtx (*fn) (rtx, rtx) = NULL;
+ rtx (*fn) (rtx, rtx, rtx) = NULL;
switch (mode)
{
@@ -16781,7 +16782,7 @@ emit_load_locked (machine_mode mode, rtx reg, rtx mem)
default:
gcc_unreachable ();
}
- emit_insn (fn (reg, mem));
+ emit_insn (fn (reg, mem, local));
}
/* A subroutine of the atomic operation splitters. Emit a store-conditional
@@ -16951,7 +16952,7 @@ rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx
shift)
/* Expand an atomic compare and swap operation. */
void
-rs6000_expand_atomic_compare_and_swap (rtx operands[])
+rs6000_expand_atomic_compare_and_swap (rtx operands[], bool local)
{
rtx boolval, retval, mem, oldval, newval, cond;
rtx label1, label2, x, mask, shift;
@@ -17014,7 +17015,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
}
label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
- emit_load_locked (mode, retval, mem);
+ emit_load_locked (mode, retval, mem, local ? const1_rtx : const0_rtx);
x = retval;
if (mask)
@@ -17112,7 +17113,7 @@ rs6000_expand_atomic_exchange (rtx operands[])
label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
emit_label (XEXP (label, 0));
- emit_load_locked (mode, retval, mem);
+ emit_load_locked (mode, retval, mem, const0_rtx);
x = val;
if (mask)
@@ -17217,7 +17218,7 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem,
rtx val,
if (before == NULL_RTX)
before = gen_reg_rtx (mode);
- emit_load_locked (mode, before, mem);
+ emit_load_locked (mode, before, mem, const0_rtx);
if (code == NOT)
{
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
index fb20cd1a586..ad9c9176538 100644
--- a/gcc/config/rs6000/sync.md
+++ b/gcc/config/rs6000/sync.md
@@ -329,17 +329,19 @@ (define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
(define_insn "load_locked<mode>"
[(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
(unspec_volatile:ATOMIC
- [(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
+ [(match_operand:ATOMIC 1 "memory_operand" "Z")
+ (match_operand:QI 2 "u1bit_cint_operand" "n")] UNSPECV_LL))]
""
- "<larx> %0,%y1"
+ "<larx> %0,%y1,%2"
[(set_attr "type" "load_l")])
(define_insn "load_locked<QHI:mode>_si"
[(set (match_operand:SI 0 "int_reg_operand" "=r")
(unspec_volatile:SI
- [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
+ [(match_operand:QHI 1 "memory_operand" "Z")
+ (match_operand:QI 2 "u1bit_cint_operand" "n")] UNSPECV_LL))]
"TARGET_SYNC_HI_QI"
- "<QHI:larx> %0,%y1"
+ "<QHI:larx> %0,%y1,%2"
[(set_attr "type" "load_l")])
;; Use PTImode to get even/odd register pairs.
@@ -353,7 +355,8 @@ (define_insn "load_locked<QHI:mode>_si"
(define_expand "load_lockedti"
[(use (match_operand:TI 0 "quad_int_reg_operand"))
- (use (match_operand:TI 1 "memory_operand"))]
+ (use (match_operand:TI 1 "memory_operand"))
+ (use (match_operand:QI 2 "u1bit_cint_operand"))]
"TARGET_SYNC_TI"
{
rtx op0 = operands[0];
@@ -367,7 +370,7 @@ (define_expand "load_lockedti"
operands[1] = op1 = change_address (op1, TImode, new_addr);
}
- emit_insn (gen_load_lockedpti (pti, op1));
+ emit_insn (gen_load_lockedpti (pti, op1, operands[2]));
if (WORDS_BIG_ENDIAN)
emit_move_insn (op0, gen_lowpart (TImode, pti));
else
@@ -381,11 +384,12 @@ (define_expand "load_lockedti"
(define_insn "load_lockedpti"
[(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
(unspec_volatile:PTI
- [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")] UNSPECV_LL))]
+ [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")
+ (match_operand:QI 2 "u1bit_cint_operand" "n")] UNSPECV_LL))]
"TARGET_SYNC_TI
&& !reg_mentioned_p (operands[0], operands[1])
&& quad_int_reg_operand (operands[0], PTImode)"
- "lqarx %0,%y1"
+ "lqarx %0,%y1,%2"
[(set_attr "type" "load_l")
(set_attr "size" "128")])
@@ -462,7 +466,22 @@ (define_expand "atomic_compare_and_swap<mode>"
(match_operand:SI 7 "const_int_operand")] ;; model fail
""
{
- rs6000_expand_atomic_compare_and_swap (operands);
+ rs6000_expand_atomic_compare_and_swap (operands, false);
+ DONE;
+})
+
+(define_expand "atomic_compare_and_swap_local<mode>"
+ [(match_operand:SI 0 "int_reg_operand") ;; bool out
+ (match_operand:AINT 1 "int_reg_operand") ;; val out
+ (match_operand:AINT 2 "memory_operand") ;; memory
+ (match_operand:AINT 3 "reg_or_short_operand") ;; expected
+ (match_operand:AINT 4 "int_reg_operand") ;; desired
+ (match_operand:SI 5 "const_int_operand") ;; is_weak
+ (match_operand:SI 6 "const_int_operand") ;; model succ
+ (match_operand:SI 7 "const_int_operand")] ;; model fail
+ ""
+{
+ rs6000_expand_atomic_compare_and_swap (operands, true);
DONE;
})
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7bd84e6d8cc..45881280515 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -24080,6 +24080,29 @@ defined, then the @code{__builtin_set_fpscr_rn}
built-in returns the FPSCR
fields. If not defined, the @code{__builtin_set_fpscr_rn} does not return a
value. If the @option{-msoft-float} option is used, the
@code{__builtin_set_fpscr_rn} built-in will not return a value.
+@defbuiltin{bool __builtin_ppc_atomic_cas_local (@var{type} *@var{ptr},
@var{type} *@var{expected}, @var{type} *@var{desired}, bool @var{weak}, int
@var{success_memorder}, int @var{failure_memorder})}
+This built-in function implements a PowerPC-specific atomic compare and
+exchange operation. It behaves identically to the generic
+@code{__atomic_compare_exchange} built-in function, with one key difference:
+it uses an EH (Extended Hint) value of 1 in the @code{lbarx}, @code{lharx},
+@code{lwarx}, or @code{ldarx} instruction (load-and-reserve indexed).
+
+The EH bit provides a hint to the processor that the program will perform a
+subsequent store to the specified location. This hint helps optimize lock
+contention on PowerPC systems by keeping the lock cacheline in the local
+processor longer, reducing performance penalties from cache coherence protocol
+traffic.
+
+For details on the behavior and semantics of the arguments, refer to the
+@code{__atomic_compare_exchange} documentation.
+
+The first three pointer arguments (@var{ptr}, @var{expected}, and
+@var{desired}) must point to complete types, and the size of the types they
+point to must be the same and known at compile time. The types themselves may
+differ. The supported type sizes are 1, 2, 4, 8, or 16 bytes.
+
+@enddefbuiltin
+
@node Basic PowerPC Built-in Functions Available on ISA 2.05
@subsubsection Basic PowerPC Built-in Functions Available on ISA 2.05
diff --git a/gcc/testsuite/gcc.target/powerpc/acmp-tst-32bit.c
b/gcc/testsuite/gcc.target/powerpc/acmp-tst-32bit.c
new file mode 100644
index 00000000000..79f646a4f27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/acmp-tst-32bit.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+bool
+word_exchange_qi (signed char *ptr, signed char *expected, signed char
*desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_uqi (unsigned char *ptr, unsigned char *expected,
+ unsigned char *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_hi (short *ptr, short *expected, short *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_shi (signed short *ptr, signed short *expected,
+ signed short *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_uhi (unsigned short *ptr, unsigned short *expected,
+ unsigned short *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_si (int *ptr, int *expected, int *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_ssi (signed int *ptr, signed int *expected, signed int *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_usi (unsigned int *ptr, unsigned int *expected,
+ unsigned int *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+
+/* { dg-final { scan-assembler-times {\mlbarx +[0-9]+,[0-9]+,[0-9]+,1} 2 } } */
+/* { dg-final { scan-assembler-times {\mlharx +[0-9]+,[0-9]+,[0-9]+,1} 3 } } */
+/* { dg-final { scan-assembler-times {\mlwarx +[0-9]+,[0-9]+,[0-9]+,1} 3 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/acmp-tst-indexed.c
b/gcc/testsuite/gcc.target/powerpc/acmp-tst-indexed.c
new file mode 100644
index 00000000000..c0d2bf3dcb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/acmp-tst-indexed.c
@@ -0,0 +1,165 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+
+// Need power8 for l<b,h,q>arx
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+typedef struct udt_1
+{
+ char *a;
+} udt_1t;
+typedef struct udt_2
+{
+ char a;
+ char b;
+} udt_2t;
+typedef struct udt_4
+{
+ short a;
+ short b;
+} udt_4t;
+typedef struct udt_8
+{
+ int a;
+ int b;
+} udt_8t;
+bool
+word_exchange_nqi (char *ptr, char *expected, char *desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_qi (signed char *ptr, signed char *expected, signed char
*desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_uqi (unsigned char *ptr, unsigned char *expected,
+ unsigned char *desired, unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_hi (short *ptr, short *expected, short *desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_shi (signed short *ptr, signed short *expected,
+ signed short *desired, unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_uhi (unsigned short *ptr, unsigned short *expected,
+ unsigned short *desired, unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_si (int *ptr, int *expected, int *desired, unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_ssi (signed int *ptr, signed int *expected, signed int *desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_usi (unsigned int *ptr, unsigned int *expected,
+ unsigned int *desired, unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_di (long long *ptr, long long *expected, long long *desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_sdi (signed long long *ptr, signed long long *expected,
+ signed long long *desired, unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udi (unsigned long long *ptr, unsigned long long *expected,
+ unsigned long long *desired, unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_f32 (float *ptr, float *expected, float *desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_f64 (double *ptr, double *expected, double *desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udt_1 (udt_1t *ptr, udt_1t *expected, udt_1t *desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udt_2 (udt_2t *ptr, udt_2t *expected, udt_2t *desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udt_4 (udt_4t *ptr, udt_4t *expected, udt_4t *desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udt_8 (udt_8t *ptr, udt_8t *expected, udt_8t *desired,
+ unsigned long long n)
+{
+ return __builtin_ppc_atomic_cas_local (ptr+n, expected+n, desired+n, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+
+/* Test if indexed mode addresses are used, r6 must be used which corresponds
+ to the argument n in all functions. */
+
+/* { dg-final { scan-assembler-times {\mlbarx +[0-9]+,[0-9]+,6,1} 3 } } */
+/* { dg-final { scan-assembler-times {\mstbcx. +[0-9]+,[0-9]+,6} 3 } } */
+
+/* { dg-final { scan-assembler-times {\mlharx +[0-9]+,[0-9]+,6,1} 4 } } */
+/* { dg-final { scan-assembler-times {\msthcx. +[0-9]+,[0-9]+,6} 4 } } */
+
+/* { dg-final { scan-assembler-times {\mlwarx +[0-9]+,[0-9]+,6,1} 5 } } */
+/* { dg-final { scan-assembler-times {\mstwcx. +[0-9]+,[0-9]+,6} 5 } } */
+
+/* { dg-final { scan-assembler-times {\mldarx +[0-9]+,[0-9]+,6,1} 6 } } */
+/* { dg-final { scan-assembler-times {\mstdcx. +[0-9]+,[0-9]+,6} 6 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/acmp-tst.c
b/gcc/testsuite/gcc.target/powerpc/acmp-tst.c
new file mode 100644
index 00000000000..bb15805819f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/acmp-tst.c
@@ -0,0 +1,176 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+
+// Need power8 for l<b,h,q>arx
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+typedef struct udt_1
+{
+ char *a;
+} udt_1t;
+typedef struct udt_2
+{
+ char a;
+ char b;
+} udt_2t;
+typedef struct udt_4
+{
+ short a;
+ short b;
+} udt_4t;
+typedef struct udt_8
+{
+ int a;
+ int b;
+} udt_8t;
+typedef struct udt_16
+{
+ long long a;
+ long long b;
+} udt_16t;
+bool
+word_exchange_nqi (char *ptr, char *expected, char *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_qi (signed char *ptr, signed char *expected, signed char
*desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_uqi (unsigned char *ptr, unsigned char *expected,
+ unsigned char *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_hi (short *ptr, short *expected, short *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_shi (signed short *ptr, signed short *expected,
+ signed short *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_uhi (unsigned short *ptr, unsigned short *expected,
+ unsigned short *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_si (int *ptr, int *expected, int *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_ssi (signed int *ptr, signed int *expected, signed int *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_usi (unsigned int *ptr, unsigned int *expected,
+ unsigned int *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_di (long long *ptr, long long *expected, long long *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_sdi (signed long long *ptr, signed long long *expected,
+ signed long long *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udi (unsigned long long *ptr, unsigned long long *expected,
+ unsigned long long *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_sti (signed __int128 *ptr, signed __int128 *expected,
+ signed __int128 *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_uti (unsigned __int128 *ptr, unsigned __int128 *expected,
+ unsigned __int128 *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_f32 (float *ptr, float *expected, float *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_f64 (double *ptr, double *expected, double *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_f128 (__ieee128 *ptr, __ieee128 *expected, __ieee128 *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udt_1 (udt_1t *ptr, udt_1t *expected, udt_1t *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udt_2 (udt_2t *ptr, udt_2t *expected, udt_2t *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udt_4 (udt_4t *ptr, udt_4t *expected, udt_4t *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udt_8 (udt_8t *ptr, udt_8t *expected, udt_8t *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+bool
+word_exchange_udt_16 (udt_16t *ptr, udt_16t *expected, udt_16t *desired)
+{
+ return __builtin_ppc_atomic_cas_local (ptr, expected, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+
+/* { dg-final { scan-assembler-times {\mlbarx +[0-9]+,[0-9]+,[0-9]+,1} 3 } } */
+/* { dg-final { scan-assembler-times {\mlharx +[0-9]+,[0-9]+,[0-9]+,1} 4 } } */
+/* { dg-final { scan-assembler-times {\mlwarx +[0-9]+,[0-9]+,[0-9]+,1} 5 } } */
+/* { dg-final { scan-assembler-times {\mldarx +[0-9]+,[0-9]+,[0-9]+,1} 6 } } */
+/* { dg-final { scan-assembler-times {\mlqarx +[0-9]+,[0-9]+,[0-9]+,1} 4 } } */
--
2.54.0