Hi Saurabh, Thanks for the feedback, I've given some thoughts on some and agreed to a few of your other suggestions.
Thanks, Richard On Fri, Oct 31, 2025 at 10:12:31AM +0000, Saurabh Jha wrote: > > > On 10/31/2025 12:10 AM, [email protected] wrote: > > From: Richard Ball <[email protected]> > > > > This patch adds support for the atomic_store_with_stshh intrinsic > > in aarch64. This intrinsic is part of FEAT_PCDPHINT. > > > > gcc/ChangeLog: > > > > * config/aarch64/aarch64-builtins.cc > > (enum aarch64_builtins): Add new flags. > > (aarch64_init_pcdphint_builtins): Create new Builtin functions. > Nit: was capitalising `Builtin` intentional?> > (aarch64_general_init_builtins): Call init for PCDPHINT. > > (aarch64_expand_stshh_builtin): Expander for new intrinsic. > > (aarch64_general_expand_builtin): Call new expander. > > * config/aarch64/aarch64-c.cc > > (aarch64_update_cpp_builtins): New feature. > > * config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION): > > Likewise. > > * config/aarch64/aarch64.h (TARGET_PCDPHINT): Likewise. > > * config/aarch64/arm_acle.h > > (__atomic_store_with_stshh): Generic to call builtins. > > * config/aarch64/atomics.md > > (@aarch64_atomic_store_stshh<mode>): New pattern for intrinsic. > > * config/aarch64/iterators.md: New UNSPEC. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/aarch64/atomic_store_with_stshh.c.c: New test. > > --- > > gcc/config/aarch64/aarch64-builtins.cc | 128 ++++++++++++++++++ > > gcc/config/aarch64/aarch64-c.cc | 1 + > > .../aarch64/aarch64-option-extensions.def | 2 + > > gcc/config/aarch64/aarch64.h | 3 + > > gcc/config/aarch64/arm_acle.h | 31 +++++ > > gcc/config/aarch64/atomics.md | 23 ++++ > > gcc/config/aarch64/iterators.md | 1 + > > .../aarch64/atomic_store_with_stshh.c.c | 119 ++++++++++++++++ > > 8 files changed, 308 insertions(+) > > create mode 100644 > > gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c.c > > > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc > > b/gcc/config/aarch64/aarch64-builtins.cc > > index 408099a50e8..e8277185010 100644 > > --- a/gcc/config/aarch64/aarch64-builtins.cc > > +++ b/gcc/config/aarch64/aarch64-builtins.cc > > @@ -901,6 +901,14 @@ enum aarch64_builtins > > AARCH64_BUILTIN_GCSPR, > > AARCH64_BUILTIN_GCSPOPM, > > AARCH64_BUILTIN_GCSSS, > > + /* Armv9.6-A builtins. */ > > + AARCH64_BUILTIN_STSHH_QI, > > + AARCH64_BUILTIN_STSHH_HI, > > + AARCH64_BUILTIN_STSHH_SI, > > + AARCH64_BUILTIN_STSHH_DI, > > + AARCH64_BUILTIN_STSHH_PTR, > > + AARCH64_BUILTIN_STSHH_SF, > > + AARCH64_BUILTIN_STSHH_DF, > > AARCH64_BUILTIN_MAX > > }; > > @@ -2466,6 +2474,70 @@ aarch64_init_gcs_builtins (void) > > AARCH64_BUILTIN_GCSSS); > > } > > +/* Add builtins for FEAT_PCDPHINT. */ > > + > > +static void > > +aarch64_init_pcdphint_builtins (void) > > +{ > > + tree ftype; > > + > > + ftype = build_function_type_list (void_type_node, ptr_type_node, > > + unsigned_char_type_node, > > + unsigned_type_node, > > + unsigned_type_node, NULL_TREE); > > + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_QI] > > + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_qi", ftype, > > + AARCH64_BUILTIN_STSHH_QI); > > + > > + ftype = build_function_type_list (void_type_node, ptr_type_node, > > + short_unsigned_type_node, > > + unsigned_type_node, > > + unsigned_type_node, NULL_TREE); > > + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_HI] > > + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_hi", ftype, > > + AARCH64_BUILTIN_STSHH_HI); > > + > > + ftype = build_function_type_list (void_type_node, ptr_type_node, > > + unsigned_type_node, > > + unsigned_type_node, > > + unsigned_type_node, NULL_TREE); > > How about naming these function types differently, like unsigned_char_ftype, > short_unsigned_ftype, or unsigned_ftype? Likewise for below ftype variables. > > Or you can also define them inline. > These variables are defined outside this scope. They are used across all the different function type lists so creating a new one for this is not really an option. The ftype variables could be given different names although it is within keeping of the rest of the file not to. > I am just thinking from readability perspective here. > > + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_SI] > > + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_si", ftype, > > + AARCH64_BUILTIN_STSHH_SI); > > + > > + ftype = build_function_type_list (void_type_node, ptr_type_node, > > + long_long_unsigned_type_node, > > + unsigned_type_node, > > + unsigned_type_node, NULL_TREE); > > + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_DI] > > + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_di", ftype, > > + AARCH64_BUILTIN_STSHH_DI); > > + > > + ftype = build_function_type_list (void_type_node, ptr_type_node, > > + ptr_type_node, > > + unsigned_type_node, > > + unsigned_type_node, NULL_TREE); > > + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_PTR] > > + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_ptr", ftype, > > + AARCH64_BUILTIN_STSHH_PTR); > > + > > + ftype = build_function_type_list (void_type_node, ptr_type_node, > > + float_type_node, > > + unsigned_type_node, > > + unsigned_type_node, NULL_TREE); > > + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_SF] > > + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_sf", ftype, > > + AARCH64_BUILTIN_STSHH_SF); > > + > > + ftype = build_function_type_list (void_type_node, ptr_type_node, > > + double_type_node, > > + unsigned_type_node, > > + unsigned_type_node, NULL_TREE); > > + aarch64_builtin_decls[AARCH64_BUILTIN_STSHH_DF] > > + = aarch64_general_add_builtin ("__builtin_aarch64_stshh_df", ftype, > > + AARCH64_BUILTIN_STSHH_DF); > > +} > > + > > /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */ > > void > > @@ -2514,6 +2586,7 @@ aarch64_general_init_builtins (void) > > AARCH64_BUILTIN_CHKFEAT); > > aarch64_init_gcs_builtins (); > > + aarch64_init_pcdphint_builtins (); > > if (in_lto_p) > > handle_arm_acle_h (); > > @@ -3932,6 +4005,52 @@ aarch64_expand_tbl_tbx (vec<expand_operand> &ops, > > int unspec) > > return result; > > } > > +static rtx > > +aarch64_expand_stshh_builtin (tree exp, int fcode) > > +{ > > + expand_operand ops[4]; > > + enum insn_code icode; > > + machine_mode mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 1))); > > + > > + rtx val = expand_normal (CALL_EXPR_ARG (exp, 1)); > > + rtx mem_order = expand_normal (CALL_EXPR_ARG (exp, 2)); > > + rtx ret = expand_normal (CALL_EXPR_ARG (exp, 3)); > > + > > + if (fcode == AARCH64_BUILTIN_STSHH_SF) > > Would a switch be more helpful here? > I was on the fence, I'll be honest. It's only 3 alternatives, though I suppose given the possibilty of future additions a switch might be better. I'll update in the next revision. > > + { > > + rtx ival = gen_reg_rtx (SImode); > > + emit_move_insn (ival, gen_lowpart (SImode, val)); > > + val = ival; > > + mode = SImode; > > + } > > + else if (fcode == AARCH64_BUILTIN_STSHH_DF) > > + { > > + rtx ival = gen_reg_rtx (DImode); > > + emit_move_insn (ival, gen_lowpart (DImode, val)); > > + val = ival; > > + mode = DImode; > > + } > > + else > > + { > > + if (!register_operand (val, mode)) > > + val = force_reg (mode, val); > > + } > > + > > + rtx addr = expand_normal (CALL_EXPR_ARG (exp, 0)); > > + addr = convert_memory_address (Pmode, addr); > > + rtx mem = gen_rtx_MEM (mode, addr); > > + set_mem_align (mem, GET_MODE_ALIGNMENT (mode)); > > + > > + icode = code_for_aarch64_atomic_store_stshh (mode); > How about you declare "expand_operand ops[]" here, closer to use? Works for me. > > > + create_output_operand (&ops[0], mem, mode); > > + create_input_operand (&ops[1], val, mode); > > + create_input_operand (&ops[2], mem_order, SImode); > > + create_input_operand (&ops[3], ret, SImode); > > + > > + expand_insn (icode, 4, ops); > > + return ops[0].value; > > +} > > + > > /* Expand CALL_EXPR EXP, given that it is a call to the function described > > by BUILTIN_DATA, and return the function's return value. Put the > > result > > in TARGET if convenient. */ > > @@ -4422,6 +4541,15 @@ aarch64_general_expand_builtin (unsigned int fcode, > > tree exp, rtx target, > > case AARCH64_BUILTIN_GCSPOPM: > > case AARCH64_BUILTIN_GCSSS: > > return aarch64_expand_gcs_builtin (exp, target, fcode, ignore); > > + > > + case AARCH64_BUILTIN_STSHH_QI: > > + case AARCH64_BUILTIN_STSHH_HI: > > + case AARCH64_BUILTIN_STSHH_SI: > > + case AARCH64_BUILTIN_STSHH_DI: > > + case AARCH64_BUILTIN_STSHH_PTR: > > + case AARCH64_BUILTIN_STSHH_SF: > > + case AARCH64_BUILTIN_STSHH_DF: > > + return aarch64_expand_stshh_builtin (exp, fcode); > > } > > if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= > > AARCH64_SIMD_BUILTIN_MAX) > > diff --git a/gcc/config/aarch64/aarch64-c.cc > > b/gcc/config/aarch64/aarch64-c.cc > > index c3957c762ef..71ea66d1762 100644 > > --- a/gcc/config/aarch64/aarch64-c.cc > > +++ b/gcc/config/aarch64/aarch64-c.cc > > @@ -295,6 +295,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) > > aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1), > > "__ARM_FEATURE_SME2p1", pfile); > > aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile); > > + aarch64_def_or_undef (TARGET_PCDPHINT, "__ARM_FEATURE_PCDPHINT", pfile); > > // Function multi-versioning defines > > aarch64_def_or_undef (targetm.has_ifunc_p (), > > diff --git a/gcc/config/aarch64/aarch64-option-extensions.def > > b/gcc/config/aarch64/aarch64-option-extensions.def > > index a70375c053f..9e39aeacc18 100644 > > --- a/gcc/config/aarch64/aarch64-option-extensions.def > > +++ b/gcc/config/aarch64/aarch64-option-extensions.def > > @@ -281,6 +281,8 @@ AARCH64_OPT_EXTENSION ("sme-lutv2", SME_LUTv2, (SME2), > > (), (), "sme-lutv2") > > AARCH64_OPT_EXTENSION("cpa", CPA, (), (), (), "") > > +AARCH64_OPT_EXTENSION ("pcdphint", PCDPHINT, (), (), (), "") > > + > > #undef AARCH64_OPT_FMV_EXTENSION > > #undef AARCH64_OPT_EXTENSION > > #undef AARCH64_FMV_FEATURE > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > > index 2cd929d83f9..24597d49a61 100644 > > --- a/gcc/config/aarch64/aarch64.h > > +++ b/gcc/config/aarch64/aarch64.h > > @@ -392,6 +392,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE > > ATTRIBUTE_UNUSED > > but are incompatible with -mtrack-speculation. */ > > #define TARGET_CMPBR (AARCH64_HAVE_ISA (CMPBR) && > > !aarch64_track_speculation) > > +/* PCDPHINT instructions are enabled through +pcdphint. */ > > +#define TARGET_PCDPHINT AARCH64_HAVE_ISA (PCDPHINT) > > + > > /* Make sure this is always defined so we don't have to check for ifdefs > > but rather use normal ifs. */ > > #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT > > diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h > > index 507b6e72bcb..d2a5be0e8f7 100644 > > --- a/gcc/config/aarch64/arm_acle.h > > +++ b/gcc/config/aarch64/arm_acle.h > > @@ -132,6 +132,37 @@ __sqrtf (float __x) > > return __builtin_aarch64_sqrtsf (__x); > > } > > +#define __atomic_store_with_stshh(dst, value, memory_order, ret) \ > > Why is this defined as a macro rather than a function? Having this as a function would require types for the parameters, as value can take any number of types that becomes difficult. The _Generic layout as I have done it here has been used a few times in GCC and seems to normally be a macro. > > Thanks, > Saurabh > > > + _Generic ((value), > > \ > > + char: __builtin_aarch64_stshh_qi, \ > > + unsigned char: __builtin_aarch64_stshh_qi, \ > > + signed char: __builtin_aarch64_stshh_qi, \ > > + unsigned short: __builtin_aarch64_stshh_hi, \ > > + short: __builtin_aarch64_stshh_hi, \ > > + unsigned int: __builtin_aarch64_stshh_si, \ > > + int: __builtin_aarch64_stshh_si, \ > > + unsigned long: __builtin_aarch64_stshh_di, \ > > + long: __builtin_aarch64_stshh_di, \ > > + unsigned long long: __builtin_aarch64_stshh_di, \ > > + long long: __builtin_aarch64_stshh_di, \ > > + float: __builtin_aarch64_stshh_sf, \ > > + double: __builtin_aarch64_stshh_df, \ > > + char*: __builtin_aarch64_stshh_ptr, \ > > + unsigned char*: __builtin_aarch64_stshh_ptr, \ > > + signed char*: __builtin_aarch64_stshh_ptr, \ > > + unsigned short*: __builtin_aarch64_stshh_ptr, \ > > + short*: __builtin_aarch64_stshh_ptr, \ > > + unsigned int*: __builtin_aarch64_stshh_ptr, \ > > + int*: __builtin_aarch64_stshh_ptr, \ > > + unsigned long*: __builtin_aarch64_stshh_ptr, \ > > + long*: __builtin_aarch64_stshh_ptr, \ > > + unsigned long long*: __builtin_aarch64_stshh_ptr, \ > > + long long*: __builtin_aarch64_stshh_ptr, \ > > + float*: __builtin_aarch64_stshh_ptr, \ > > + double*: __builtin_aarch64_stshh_ptr, \ > > + default: __builtin_aarch64_stshh_di \ > > + )((dst), (value), (memory_order), (ret)) > > + > > #pragma GCC push_options > > #pragma GCC target ("+nothing+jscvt") > > __extension__ extern __inline int32_t > > diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md > > index ea4a9367fc8..4a245bbebff 100644 > > --- a/gcc/config/aarch64/atomics.md > > +++ b/gcc/config/aarch64/atomics.md > > @@ -751,6 +751,29 @@ > > [(set_attr "arch" "*,rcpc8_4")] > > ) > > +(define_insn "@aarch64_atomic_store_stshh<mode>" > > + [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "=Q,Ust") > > + (unspec_volatile:ALLI > > + [(match_operand:ALLI 1 "register_operand" "r,r") > > + (match_operand:SI 2 "const_int_operand") ;; model > > + (match_operand:SI 3 "const_int_operand")] ;; ret_policy > > + UNSPECV_STSHH))] > > + "" > > + { > > + if (INTVAL (operands[3]) == 0) > > + output_asm_insn ("stshh keep", operands); > > + else > > + output_asm_insn ("stshh strm", operands); > > + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); > > + if (is_mm_relaxed (model)) > > + return "str<atomic_sfx>\t%<w>1, %0"; > > + else if (which_alternative == 0) > > + return "stlr<atomic_sfx>\t%<w>1, %0"; > > + else > > + return "stlur<atomic_sfx>\t%<w>1, %0"; > > + } > > +) > > + > > (define_insn "@aarch64_load_exclusive<mode>" > > [(set (match_operand:SI 0 "register_operand" "=r") > > (zero_extend:SI > > diff --git a/gcc/config/aarch64/iterators.md > > b/gcc/config/aarch64/iterators.md > > index 517b2808b5f..97b4ece1444 100644 > > --- a/gcc/config/aarch64/iterators.md > > +++ b/gcc/config/aarch64/iterators.md > > @@ -1302,6 +1302,7 @@ > > UNSPECV_LDA ; Represent an atomic load or > > load-acquire. > > UNSPECV_LDAP ; Represent an atomic acquire load with RCpc > > semantics. > > UNSPECV_STL ; Represent an atomic store or > > store-release. > > + UNSPECV_STSHH ; Represent an atomic store with an stshh hint. > > UNSPECV_ATOMIC_CMPSW ; Represent an atomic compare swap. > > UNSPECV_ATOMIC_EXCHG ; Represent an atomic exchange. > > UNSPECV_ATOMIC_CAS ; Represent an atomic CAS. > > diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c.c > > b/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c.c > > new file mode 100644 > > index 00000000000..e5857df22d7 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/atomic_store_with_stshh.c.c > > @@ -0,0 +1,119 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -march=armv8-a -save-temps" } */ > > +/* { dg-final { check-function-bodies "**" "" } } */ > > + > > +#include <arm_acle.h> > > + > > +/* > > +** testFun1: > > +** ... > > +** stshh keep > > +** strb w[0-9]+, \[x[0-9]+\] > > +** ... > > +*/ > > +void > > +testFun1 () > > +{ > > + char item1 = 0; > > + char* ptr1 = &item1; > > + char test1 = 1; > > + > > + __atomic_store_with_stshh (ptr1, test1, __ATOMIC_RELAXED, 0); > > +} > > + > > +/* > > +** testFun2: > > +** ... > > +** stshh keep > > +** stlrh w[0-9]+, \[x[0-9]+\] > > +** ... > > +*/ > > +void > > +testFun2 () > > +{ > > + short item2 = 10; > > + short* ptr2 = &item2; > > + short test2 = 11; > > + __atomic_store_with_stshh (ptr2, test2, __ATOMIC_RELEASE, 0); > > +} > > + > > +/* > > +** testFun3: > > +** ... > > +** stshh strm > > +** stlr w[0-9]+, \[x[0-9]+\] > > +** ... > > +*/ > > +void > > +testFun3 () > > +{ > > + unsigned int item3 = 10; > > + unsigned int* ptr3 = &item3; > > + unsigned int test3 = 11; > > + __atomic_store_with_stshh (ptr3, test3, __ATOMIC_SEQ_CST, 1); > > +} > > + > > +/* > > +** testFun4: > > +** ... > > +** stshh strm > > +** str x[0-9]+, \[x[0-9]+\] > > +** ... > > +*/ > > +void > > +testFun4 () > > +{ > > + long item4 = 10; > > + long* ptr4 = &item4; > > + long test4 = 11; > > + __atomic_store_with_stshh (ptr4, test4, __ATOMIC_RELAXED, 1); > > +} > > + > > +/* > > +** testFun5: > > +** ... > > +** stshh keep > > +** stlr x[0-9]+, \[sp\] > > +** ... > > +*/ > > +void > > +testFun5 () > > +{ > > + long item5 = 10; > > + long* ptr5 = &item5; > > + long test5item = 11; > > + long* test5 = &test5item; > > + __atomic_store_with_stshh (ptr5, test5, __ATOMIC_SEQ_CST, 0); > > +} > > + > > +/* > > +** testFun6: > > +** ... > > +** stshh keep > > +** stlr w[0-9]+, \[x[0-9]+\] > > +** ... > > +*/ > > +void > > +testFun6 () > > +{ > > + float item6 = 10; > > + float* ptr6 = &item6; > > + float test6 = 11; > > + __atomic_store_with_stshh (ptr6, test6, __ATOMIC_SEQ_CST, 0); > > +} > > + > > +/* > > +** testFun7: > > +** ... > > +** stshh strm > > +** str x[0-9]+, \[x[0-9]+\] > > +** ... > > +*/ > > +void > > +testFun7 () > > +{ > > + double item7 = 10; > > + double* ptr7 = &item7; > > + double test7 = 11; > > + __atomic_store_with_stshh (ptr7, test7, __ATOMIC_RELAXED, 1); > > +} > > \ No newline at end of file >
