https://github.com/kmclaughlin-arm updated 
https://github.com/llvm/llvm-project/pull/198316

>From 106c21f129e83919e43489241b35c1378a16fb05 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <[email protected]>
Date: Wed, 6 May 2026 13:08:02 +0000
Subject: [PATCH 1/3] [AArch64] Implement the atomic store with hint intrinsic

Adds the following ACLE intrinsic as described in [1]:

void __arm_atomic_store_with_hint(type *ptr, type data,
                                  int memory_order, int hint);

A regular atomic store instruction is emitted in Clang for this builtin
with additional metadata (`!aarch64.atomic.hint`), which ensures the
instruction is recognised as atomic by passes in LLVM.
When an atomic store has this metadata, this lowers to the ATOMIC_STORE_HINT
pseudo which is later expanded by AArch64ExpandPseudoInsts into an STSHH
instruction plus an atomic store.

The hint value is represented using MOTargetFlag3 & MOTargetFlag4 flags,
which will need to be extended when new hints are added in future.

[1] https://github.com/ARM-software/acle/pull/432
---
 clang/include/clang/Basic/BuiltinsAArch64.td  |   4 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   6 +
 clang/include/clang/Sema/SemaARM.h            |   1 +
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      |  54 ++++
 clang/lib/Headers/arm_acle.h                  |   6 +
 clang/lib/Sema/SemaARM.cpp                    |  92 ++++++
 clang/test/CodeGen/arm_acle.c                 |  78 +++++
 clang/test/CodeGen/builtins-arm64.c           |  13 +
 clang/test/Sema/builtins-arm64.c              |  17 ++
 .../include/llvm/Support/AArch64AtomicHints.h |  36 +++
 llvm/lib/CodeGen/AtomicExpandPass.cpp         |   3 +
 .../AArch64/AArch64ExpandPseudoInsts.cpp      |  56 ++++
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |  20 ++
 .../Target/AArch64/AArch64ISelLowering.cpp    |  21 +-
 .../lib/Target/AArch64/AArch64InstrAtomics.td | 109 +++++++
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |  13 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.h    |   9 +
 .../GISel/AArch64InstructionSelector.cpp      |  61 ++++
 .../Atomics/aarch64-atomic-store-hint.ll      | 288 ++++++++++++++++++
 .../Atomics/aarch64-atomic-store-hint.mir     |  30 ++
 20 files changed, 916 insertions(+), 1 deletion(-)
 create mode 100644 llvm/include/llvm/Support/AArch64AtomicHints.h
 create mode 100644 
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
 create mode 100644 
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td 
b/clang/include/clang/Basic/BuiltinsAArch64.td
index 15257f3db5b41..19a65e7bdf8fe 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -171,6 +171,10 @@ let Attributes = [NoThrow], Features = "ls64" in {
        def st64bv0 : AArch64TargetBuiltin<"uint64_t (void *, uint64_t const 
*)">;
 }
 
+let Attributes = [NoThrow, CustomTypeChecking] in {
+  def atomic_store_with_hint : AArch64Builtin<"void(...)">;
+}
+
 // Armv9.3-A Guarded Control Stack
 let Attributes = [NoThrow], Features = "gcs" in {
        def gcspopm : AArch64TargetBuiltin<"uint64_t (uint64_t)">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f84cd8dca6d4c..11ddd5b61e4cb 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9667,6 +9667,12 @@ def err_atomic_op_needs_atomic_int_or_fp : Error<
 def err_atomic_op_needs_atomic_int : Error<
   "address argument to atomic operation must be a pointer to "
   "%select{|atomic }0integer (%1 invalid)">;
+def err_atomic_op_hint_data_size : Error<
+  "address argument to atomic store with hint must be of size 8, 16, 32 or 64 
bits">;
+def err_atomic_hint_has_invalid_memory_order : Error<
+   "invalid memory order argument to atomic hint operation (%0 invalid)">;
+def err_atomic_hint_has_invalid_hint_type : Error<
+   "invalid hint type argument to atomic hint operation (%0 invalid)">;
 def warn_atomic_op_has_invalid_memory_order : Warning<
   "%select{|success |failure }0memory order argument to atomic operation is 
invalid">,
   InGroup<DiagGroup<"atomic-memory-ordering">>;
diff --git a/clang/include/clang/Sema/SemaARM.h 
b/clang/include/clang/Sema/SemaARM.h
index af8e0e9047171..b0a01c40ffece 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -70,6 +70,7 @@ class SemaARM : public SemaBase {
   bool BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, int ArgNum,
                             unsigned ExpectedFieldNum, bool AllowName);
   bool BuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall);
+  bool BuiltinARMAtomicStoreHintCall(unsigned BuiltinID, CallExpr *TheCall);
 
   bool MveAliasValid(unsigned BuiltinID, llvm::StringRef AliasName);
   bool CdeAliasValid(unsigned BuiltinID, llvm::StringRef AliasName);
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 4c668dabd53dc..4a13767268f96 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/IR/IntrinsicsARM.h"
 #include "llvm/IR/IntrinsicsBPF.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 #include "llvm/TargetParser/AArch64TargetParser.h"
 
 #include <numeric>
@@ -2129,6 +2130,56 @@ static Value *EmitRangePrefetchBuiltin(CodeGenFunction 
&CGF, unsigned BuiltinID,
                             Ops);
 }
 
+static Value *EmitAtomicStoreWithHintBuiltin(CodeGenFunction &CGF,
+                                             unsigned BuiltinID,
+                                             const CallExpr *E) {
+  CodeGen::CGBuilderTy &Builder = CGF.Builder;
+  CodeGen::CodeGenModule &CGM = CGF.CGM;
+  Expr::EvalResult Result;
+  if (!E->getArg(2)->EvaluateAsInt(Result, CGM.getContext()))
+    llvm_unreachable(
+        "Expected integer policy argument to atomic store with hint.");
+
+  StoreInst *Store =
+      Builder.CreateStore(CGF.EmitScalarExpr(E->getArg(1)),            // Value
+                          CGF.EmitPointerWithAlignment(E->getArg(0))); // Ptr;
+
+  AtomicOrdering Ordering;
+  unsigned OrderingArg = Result.Val.getInt().getExtValue();
+  assert(isValidAtomicOrderingCABI(OrderingArg) && "Invalid atomic ordering");
+
+  switch (static_cast<AtomicOrderingCABI>(OrderingArg)) {
+  default:
+    llvm_unreachable("Unsupported atomic ordering found.");
+  case AtomicOrderingCABI::relaxed:
+    Ordering = AtomicOrdering::Monotonic;
+    break;
+  case AtomicOrderingCABI::release:
+    Ordering = AtomicOrdering::Release;
+    break;
+  case AtomicOrderingCABI::seq_cst:
+    Ordering = AtomicOrdering::SequentiallyConsistent;
+    break;
+  }
+  Store->setAtomic(Ordering);
+
+  if (!E->getArg(3)->EvaluateAsInt(Result, CGM.getContext()))
+    llvm_unreachable(
+        "Expected integer hint argument to atomic store with hint.");
+  unsigned HintArg = Result.Val.getInt().getExtValue();
+  assert((getAtomicStoreHintFromMD(HintArg) !=
+          AArch64AtomicStoreHint::HINT_NONE) &&
+         "Invalid hint type");
+
+  MDNode *HintMDVal =
+      MDNode::get(CGM.getLLVMContext(),
+                  llvm::ConstantAsMetadata::get(Builder.getInt32(HintArg)));
+  Store->setMetadata(CGM.getModule().getMDKindID("aarch64.atomic.hint"),
+                     HintMDVal);
+
+  return Store;
+}
+
 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
 /// argument that specifies the vector type. The additional argument is meant
 /// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
@@ -4927,6 +4978,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
       BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
     return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
 
+  if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_hint)
+    return EmitAtomicStoreWithHintBuiltin(*this, BuiltinID, E);
+
   // Memory Tagging Extensions (MTE) Intrinsics
   Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
   switch (BuiltinID) {
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 9a6b6a837fa5a..bd99527dc5fa8 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -741,6 +741,12 @@ __arm_st64bv0(void *__addr, data512_t __value) {
 }
 #endif
 
+/* Atomic store with hints */
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+#define __arm_atomic_store_with_hint(ptr, data, memory_order, hint)            
\
+  __builtin_arm_atomic_store_with_hint(ptr, data, memory_order, hint)
+#endif
+
 /* 11.1 Special register intrinsics */
 #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
 #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 5e7504fab416d..78f83d18deab8 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -17,6 +17,7 @@
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Sema.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 
 namespace clang {
 
@@ -322,6 +323,94 @@ bool SemaARM::BuiltinARMSpecialReg(unsigned BuiltinID, 
CallExpr *TheCall,
   return false;
 }
 
+bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
+                                            CallExpr *TheCall) {
+  if (SemaRef.checkArgCount(TheCall, 4))
+    return true;
+
+  // Arg 0 should be the pointer type. The pointee type must be a
+  // scalar integral or floating-point type of 8, 16, 32 or 64 bits.
+  ASTContext &Context = getASTContext();
+  Expr *PtrArg = TheCall->getArg(0);
+  auto PtrArgRes = SemaRef.DefaultFunctionArrayLvalueConversion(PtrArg);
+  if (PtrArgRes.isInvalid())
+    return true;
+  auto *PtrTy = PtrArg->getType()->getAs<PointerType>();
+  if (!PtrTy)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_builtin_must_be_pointer)
+           << PtrArg->getType() << 0 << PtrArg->getSourceRange();
+  QualType PtrQT = PtrTy->getPointeeType();
+
+  // TODO: Allow MFloat8 types when supported by atomic store
+  if (!PtrQT->isIntegralType(getASTContext()) && !PtrQT->isFloatingType())
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_op_needs_atomic_int_or_fp)
+           << 0 << PtrQT << PtrArg->getSourceRange();
+
+  unsigned TySize =
+      
Context.getTypeSize(Context.getCanonicalType(PtrQT).getUnqualifiedType());
+  if (TySize != 8 && TySize != 16 && TySize != 32 && TySize != 64)
+    return Diag(TheCall->getBeginLoc(), diag::err_atomic_op_hint_data_size)
+           << PtrArg->getSourceRange();
+
+  // Arg 1 is the data to be stored. The type must match the pointee
+  // type found above.
+  auto DataArgRes =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(1));
+  if (DataArgRes.isInvalid())
+    return true;
+  QualType DataQT = DataArgRes.get()->getType();
+
+  if (PtrQT != DataQT)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_typecheck_call_different_arg_types)
+           << PtrQT << DataQT;
+
+  // Arg 2 is the memory order, which must be relaxed, release or seq_cst
+  auto MemOrdArg =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(2)).get();
+  std::optional<llvm::APSInt> MemOrdAP =
+      MemOrdArg->getIntegerConstantExpr(Context);
+  if (!MemOrdAP)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_hint_has_invalid_memory_order)
+           << MemOrdArg->getType() << MemOrdArg->getSourceRange();
+
+  unsigned Ordering = MemOrdAP->getZExtValue();
+  if (!llvm::isValidAtomicOrderingCABI(Ordering))
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_hint_has_invalid_memory_order)
+           << *MemOrdAP << MemOrdArg->getSourceRange();
+
+  auto AtomicOrdering = static_cast<llvm::AtomicOrderingCABI>(Ordering);
+  if (AtomicOrdering != llvm::AtomicOrderingCABI::relaxed &&
+      AtomicOrdering != llvm::AtomicOrderingCABI::release &&
+      AtomicOrdering != llvm::AtomicOrderingCABI::seq_cst)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_hint_has_invalid_memory_order)
+           << *MemOrdAP << MemOrdArg->getSourceRange();
+
+  // Arg 3 is the hint type. Only values represented by AArch64AtomicStoreHint
+  // are valid.
+  auto HintArg =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(3)).get();
+  std::optional<llvm::APSInt> HintAP = 
HintArg->getIntegerConstantExpr(Context);
+  if (!HintAP)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_hint_has_invalid_hint_type)
+           << HintArg->getType() << HintArg->getSourceRange();
+
+  unsigned Hint = HintAP->getZExtValue();
+  if (llvm::getAtomicStoreHintFromMD(Hint) ==
+      llvm::AArch64AtomicStoreHint::HINT_NONE)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_hint_has_invalid_hint_type)
+           << *HintAP << HintArg->getSourceRange();
+
+  return false;
+}
+
 /// getNeonEltType - Return the QualType corresponding to the elements of
 /// the vector type specified by the NeonTypeFlags.  This is used to check
 /// the pointer arguments for Neon load/store intrinsics.
@@ -1166,6 +1255,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const 
TargetInfo &TI,
       BuiltinID == AArch64::BI__builtin_arm_wsrp)
     return BuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true);
 
+  if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_hint)
+    return BuiltinARMAtomicStoreHintCall(BuiltinID, TheCall);
+
   // Only check the valid encoding range. Any constant in this range would be
   // converted to a register of the form S2_2_C3_C4_5. Let the hardware throw
   // an exception for incorrect registers. This matches MSVC behavior.
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index cd18fa63bfdbd..a8aa0916a8a4c 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1821,3 +1821,81 @@ int test_rndrrs(uint64_t *__addr) {
   return __rndrrs(__addr);
 }
 #endif
+
+#if defined(__ARM_64BIT_STATE)
+
+// AArch64-LABEL: @test_atomic_store_hint_char(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i8 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, 
align 1, !aarch64.atomic.hint [[META3:![0-9]+]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_char(char *ptr, char data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_bfloat(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic bfloat [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 2, !aarch64.atomic.hint [[META4:![0-9]+]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_bfloat(__bf16 *ptr, __bf16 data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 1);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_short(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i16 [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 2, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_short(short *ptr, short data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_u32(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, 
align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_u32(uint32_t *ptr, uint32_t data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_float(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic float [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, 
align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_float(float *ptr, float data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_s64(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, 
align 8, !aarch64.atomic.hint [[META4]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_s64(int64_t *ptr, int64_t data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_long_long_int(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_long_long_int(long long int *ptr, long long int 
data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_double(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic double [[DATA:%.*]], ptr [[PTR:%.*]] 
monotonic, align 8, !aarch64.atomic.hint [[META4]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_double(double *ptr, double data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1);
+}
+
+// AArch64: [[META3]] = !{i32 0}
+// AArch64-NEXT: [[META4]] = !{i32 1}
+#endif
diff --git a/clang/test/CodeGen/builtins-arm64.c 
b/clang/test/CodeGen/builtins-arm64.c
index 3d054c79f1777..ad9ba7feca671 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -216,4 +216,17 @@ void trap() {
   __builtin_arm_trap(42);
 }
 
+void atomic_store_with_hint(int64_t *a, int64_t b) {
+  __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELAXED, 0); // 
HINT_STSHH_KEEP
+  // CHECK: store atomic i64 {{.*}}, ptr {{.*}} monotonic, align 8, 
!aarch64.atomic.hint ![[M1:[0-9]]]
+
+  __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_SEQ_CST, 0);
+  // CHECK: store atomic i64 {{.*}}, ptr {{.*}} seq_cst, align 8, 
!aarch64.atomic.hint ![[M1]]
+
+  __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELEASE, 1); // 
HINT_STSHH_STRM
+  // CHECK: store atomic i64 {{.*}}, ptr {{.*}} release, align 8, 
!aarch64.atomic.hint ![[M2:[0-9]]]
+}
+
 // CHECK: ![[M0]] = !{!"1:2:3:4:5"}
+// CHECK: ![[M1]] = !{i32 0}
+// CHECK: ![[M2]] = !{i32 1}
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index 41cffd7ebb1a0..fb4718a1bd1f4 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -51,3 +51,20 @@ void test_trap(short s, unsigned short us) {
   __builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' 
must be a constant integer}}
   __builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' 
must be a constant integer}}
 }
+
+void test_atomic_store_hint(char *c_ptr, __int128 *inv_ptr, float *f_ptr,
+                            char c_data, __int128 inv_data, float f_data,
+                            int inv_int) {
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0); // expected-error 
{{too few arguments to function call, expected 4, have 3}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 0, 0); // 
expected-error {{too many arguments to function call, expected 4, have 5}}
+
+  __builtin_arm_atomic_store_with_hint(0, c_data, 0, 0); // expected-error 
{{address argument to atomic builtin must be a pointer ('int' invalid)}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, f_data, 0, 0); // expected-error 
{{arguments are of different types ('char' vs 'float')}}
+  __builtin_arm_atomic_store_with_hint(inv_ptr, inv_data, 0, 0); // 
expected-error {{address argument to atomic store with hint must be of size 8, 
16, 32 or 64 bits}}
+
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, inv_int, 0); // 
expected-error {{invalid memory order argument to atomic hint operation ('int' 
invalid)}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 2, 0); // expected-error 
{{invalid memory order argument to atomic hint operation (2 invalid)}}
+
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, inv_int); // 
expected-error {{invalid hint type argument to atomic hint operation ('int' 
invalid)}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 3); // expected-error 
{{invalid hint type argument to atomic hint operation (3 invalid)}}
+}
diff --git a/llvm/include/llvm/Support/AArch64AtomicHints.h 
b/llvm/include/llvm/Support/AArch64AtomicHints.h
new file mode 100644
index 0000000000000..8118f3e2df3ad
--- /dev/null
+++ b/llvm/include/llvm/Support/AArch64AtomicHints.h
@@ -0,0 +1,36 @@
+//===-- AArch64AtomicHints.h - AArch64 Atomic Hint Attributes ---*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_AARCH64ATOMICHINTS_H
+#define LLVM_SUPPORT_AARCH64ATOMICHINTS_H
+
+namespace llvm {
+enum class AArch64AtomicStoreHint {
+  HINT_NONE = 0,
+  HINT_STSHH_KEEP = 1,
+  HINT_STSHH_STRM = 2,
+};
+
+template <typename Int> inline bool isValidAArch64AtomicHintValue(Int I) {
+  return (Int)AArch64AtomicStoreHint::HINT_STSHH_KEEP <= I &&
+         I <= (Int)AArch64AtomicStoreHint::HINT_STSHH_STRM;
+}
+
+template <typename Int>
+inline AArch64AtomicStoreHint getAtomicStoreHintFromMD(Int I) {
+  switch (I) {
+  case 0:
+    return AArch64AtomicStoreHint::HINT_STSHH_KEEP;
+  case 1:
+    return AArch64AtomicStoreHint::HINT_STSHH_STRM;
+  default:
+    return AArch64AtomicStoreHint::HINT_NONE;
+  }
+}
+} // namespace llvm
+#endif // LLVM_SUPPORT_AARCH64ATOMICHINTS_H
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 960d2492c2856..3b2d60fa4526c 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -254,6 +254,8 @@ static void copyMetadataForAtomic(Instruction &Dest,
         Dest.setMetadata(ID, N);
       else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
         Dest.setMetadata(ID, N);
+      else if (ID == Ctx.getMDKindID("aarch64.atomic.hint"))
+        Dest.setMetadata(ID, N);
 
       // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
       // uses.
@@ -719,6 +721,7 @@ StoreInst 
*AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
   NewSI->setAlignment(SI->getAlign());
   NewSI->setVolatile(SI->isVolatile());
   NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
+  copyMetadataForAtomic(*NewSI, *SI);
   LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
   SI->eraseFromParent();
   return NewSI;
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp 
b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 5fa93da1544fc..81fb5619f57b0 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -70,6 +70,9 @@ class AArch64ExpandPseudoImpl {
                             MachineBasicBlock::iterator MBBI);
   bool expandSVEBitwisePseudo(MachineInstr &MI, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI);
+  bool expandAtomicStoreHintPseudo(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned Size);
   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
                       unsigned ExtendImm, unsigned ZeroReg,
@@ -1308,6 +1311,51 @@ bool AArch64ExpandPseudoImpl::expandFormTuplePseudo(
   return true;
 }
 
+bool AArch64ExpandPseudoImpl::expandAtomicStoreHintPseudo(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Size) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned StOpc;
+  unsigned Order = MI.getOperand(2).getImm();
+  bool Relaxed = Order == 0;
+
+  switch (Size) {
+  case 8:
+    StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
+    break;
+  case 16:
+    StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
+    break;
+  case 32:
+    StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
+    break;
+  case 64:
+    StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
+    break;
+  default:
+    llvm_unreachable("Unexpected atomic hint size.");
+  }
+
+  auto *Hint = BuildMI(MBB, MBBI, DL, TII->get(AArch64::STSHH))
+                   .addImm(MI.getOperand(3).getImm())
+                   .getInstr();
+
+  auto Store = BuildMI(MBB, MBBI, DL, TII->get(StOpc))
+                   .add(MI.getOperand(1))
+                   .addReg(MI.getOperand(0).getReg())
+                   .setMemRefs(MI.memoperands())
+                   .setMIFlags(MI.getFlags());
+
+  if (Relaxed)
+    Store.addImm(0);
+
+  transferImpOps(MI, Store, Store);
+  finalizeBundle(MBB, Hint->getIterator(), MBBI->getIterator());
+  MI.eraseFromParent();
+  return true;
+}
+
 /// If MBBI references a pseudo instruction that should be expanded here,
 /// do the expansion and return true.  Otherwise return false.
 bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
@@ -1948,6 +1996,14 @@ bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock 
&MBB,
   case AArch64::NAND_ZZZ:
   case AArch64::NOR_ZZZ:
     return expandSVEBitwisePseudo(MI, MBB, MBBI);
+  case AArch64::ATOMIC_STORE_HINT_B:
+    return expandAtomicStoreHintPseudo(MBB, MBBI, 8);
+  case AArch64::ATOMIC_STORE_HINT_H:
+    return expandAtomicStoreHintPseudo(MBB, MBBI, 16);
+  case AArch64::ATOMIC_STORE_HINT_S:
+    return expandAtomicStoreHintPseudo(MBB, MBBI, 32);
+  case AArch64::ATOMIC_STORE_HINT_D:
+    return expandAtomicStoreHintPseudo(MBB, MBBI, 64);
   }
   return false;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ade160de983b1..ac02cc7ca017a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -11,6 +11,7 @@
 
//===----------------------------------------------------------------------===//
 
 #include "AArch64.h"
+#include "AArch64InstrInfo.h"
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
@@ -21,6 +22,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
@@ -511,6 +513,10 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
 
   bool SelectCMP_SWAP(SDNode *N);
 
+  bool isAtomicHintInst(SDNode *N, AArch64AtomicStoreHint Hint) const;
+  bool isAtomicSTSHH_KEEP(SDNode *N) const;
+  bool isAtomicSTSHH_STRM(SDNode *N) const;
+
   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
                           bool Negate);
   bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
@@ -4533,6 +4539,20 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
   return true;
 }
 
+bool AArch64DAGToDAGISel::isAtomicHintInst(SDNode *N,
+                                           AArch64AtomicStoreHint Hint) const {
+  const MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
+  return AArch64InstrInfo::decodeAtomicHintFlags(MMO->getFlags()) == Hint;
+}
+
+bool AArch64DAGToDAGISel::isAtomicSTSHH_KEEP(SDNode *N) const {
+  return isAtomicHintInst(N, AArch64AtomicStoreHint::HINT_STSHH_KEEP);
+}
+
+bool AArch64DAGToDAGISel::isAtomicSTSHH_STRM(SDNode *N) const {
+  return isAtomicHintInst(N, AArch64AtomicStoreHint::HINT_STSHH_STRM);
+}
+
 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
                                              SDValue &Shift, bool Negate) {
   if (!isa<ConstantSDNode>(N))
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 63182d31bfd7b..49d99f6f8e80d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -78,6 +78,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
@@ -18662,7 +18663,25 @@ AArch64TargetLowering::getTargetMMOFlags(const 
Instruction &I) const {
   if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
       I.hasMetadata(FALKOR_STRIDED_ACCESS_MD))
     return MOStridedAccess;
-  return MachineMemOperand::MONone;
+
+  auto Flags = MachineMemOperand::MONone;
+  const MDNode *AtomicStHint = I.getMetadata(AARCH64_ATOMIC_STORE_HINT_MD);
+  if (AtomicStHint) {
+    unsigned HintVal =
+        cast<ConstantInt>(
+            cast<ConstantAsMetadata>(AtomicStHint->getOperand(0))->getValue())
+            ->getZExtValue();
+    AArch64AtomicStoreHint Hint = getAtomicStoreHintFromMD(HintVal);
+    assert(Hint != AArch64AtomicStoreHint::HINT_NONE &&
+           "Unrecognised atomic hint value requested.");
+
+    if (static_cast<unsigned>(Hint) & 0b1)
+      Flags |= MOAtomicHintBit0;
+    if (static_cast<unsigned>(Hint) & 0b10)
+      Flags |= MOAtomicHintBit1;
+  }
+
+  return Flags;
 }
 
 bool AArch64TargetLowering::isLegalInterleavedAccessType(
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td 
b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 2187f21abb70f..7d719239ecc02 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -282,6 +282,115 @@ def : Pat<(relaxed_store<atomic_store_64>
                (am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert 
(f64 FPR64Op:$val)))),
           (STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;
 
+//===----------------------------------
+// Atomic store with hint pseudos
+//===----------------------------------
+
+class seq_cst_store<PatFrags base>
+  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingSequentiallyConsistent = 1;
+}
+
+class atomic_hint_stshh_keep_relaxed<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (relaxed_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_KEEP(N); }]>;
+
+class atomic_hint_stshh_keep_release<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (releasing_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_KEEP(N); }]>;
+
+class atomic_hint_stshh_keep_seqcst<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (seq_cst_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_KEEP(N); }]>;
+
+class atomic_hint_stshh_strm_relaxed<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (relaxed_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_STRM(N); }]>;
+
+class atomic_hint_stshh_strm_release<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (releasing_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_STRM(N); }]>;
+
+class atomic_hint_stshh_strm_seqcst<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (seq_cst_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_STRM(N); }]>;
+
+class BaseStoreHintPseudo<RegisterClass regtype>
+      : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
+                        i32imm:$order, i32imm:$hint), []>, 
Sched<[WriteAtomic]> {
+  let isCodeGenOnly = 1;
+  let hasSideEffects = 1;
+  let mayStore = 1;
+}
+
+def ATOMIC_STORE_HINT_B : BaseStoreHintPseudo<GPR32>;
+def ATOMIC_STORE_HINT_H : BaseStoreHintPseudo<GPR32>;
+def ATOMIC_STORE_HINT_S : BaseStoreHintPseudo<GPR32>;
+def ATOMIC_STORE_HINT_D : BaseStoreHintPseudo<GPR64>;
+
+let AddedComplexity = 15 in {
+  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
0))>;
+  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
0))>;
+  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
0))>;
+  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 
0))>;
+
+  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
0))>;
+  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
0))>;
+  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
0))>;
+  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 
0))>;
+
+  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
0))>;
+  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
0))>;
+  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
0))>;
+  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 
0))>;
+
+  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
1))>;
+  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
1))>;
+  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
1))>;
+  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 
1))>;
+
+  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
1))>;
+  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
1))>;
+  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
1))>;
+  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 
1))>;
+
+  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
1))>;
+  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
1))>;
+  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
1))>;
+  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 
1))>;
+}
+
 //===----------------------------------
 // Low-level exclusive operations
 //===----------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 57ea2d2f2f992..1d75d3aa89f77 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2885,6 +2885,19 @@ bool AArch64InstrInfo::isStridedAccess(const 
MachineInstr &MI) {
   });
 }
 
+AArch64AtomicStoreHint
+AArch64InstrInfo::decodeAtomicHintFlags(MachineMemOperand::Flags MMOFlags) {
+  unsigned AtomicHint = 0;
+  if (MMOFlags & MOAtomicHintBit0)
+    AtomicHint += 0b1;
+  if (MMOFlags & MOAtomicHintBit1)
+    AtomicHint += 0b10;
+
+  if (!isValidAArch64AtomicHintValue(AtomicHint))
+    return AArch64AtomicStoreHint::HINT_NONE;
+  return static_cast<AArch64AtomicStoreHint>(AtomicHint);
+}
+
 bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
   switch (Opc) {
   default:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 15bd832de8d25..06fb6cbbabe5a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -16,6 +16,7 @@
 #include "AArch64.h"
 #include "AArch64RegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 #include "llvm/Support/TypeSize.h"
 #include <optional>
 
@@ -30,8 +31,13 @@ static const MachineMemOperand::Flags MOSuppressPair =
     MachineMemOperand::MOTargetFlag1;
 static const MachineMemOperand::Flags MOStridedAccess =
     MachineMemOperand::MOTargetFlag2;
+static const MachineMemOperand::Flags MOAtomicHintBit0 =
+    MachineMemOperand::MOTargetFlag3;
+static const MachineMemOperand::Flags MOAtomicHintBit1 =
+    MachineMemOperand::MOTargetFlag4;
 
 #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
+#define AARCH64_ATOMIC_STORE_HINT_MD "aarch64.atomic.hint"
 
 // AArch64 MachineCombiner patterns
 enum AArch64MachineCombinerPattern : unsigned {
@@ -230,6 +236,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
   /// Return true if the given load or store is a strided memory access.
   static bool isStridedAccess(const MachineInstr &MI);
 
+  static AArch64AtomicStoreHint
+  decodeAtomicHintFlags(MachineMemOperand::Flags MMOFlags);
+
   /// Return true if it has an unscaled load/store offset.
   static bool hasUnscaledLdStOffset(unsigned Opc);
   static bool hasUnscaledLdStOffset(MachineInstr &MI) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp 
b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 4f4c999ab244d..fe98659b6900e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -45,6 +45,7 @@
 #include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <optional>
@@ -2541,6 +2542,66 @@ bool 
AArch64InstructionSelector::earlySelect(MachineInstr &I) {
     I.eraseFromParent();
     return true;
   }
+  case TargetOpcode::G_STORE: {
+    GStore &St = cast<GStore>(I);
+    auto MMO = St.getMMO();
+    LLT PtrTy = MRI.getType(St.getPointerReg());
+
+    // Only for handling atomic store with hint.
+    // Can only handle AddressSpace 0, 64-bit pointers.
+    if (!St.isAtomic() || PtrTy != LLT::pointer(0, 64)) {
+      return false;
+    }
+
+    AArch64AtomicStoreHint Hint = TII.decodeAtomicHintFlags(MMO.getFlags());
+    if (Hint == AArch64AtomicStoreHint::HINT_NONE)
+      return false;
+
+    unsigned HintOpc;
+    unsigned StoreSize = St.getMemSizeInBits().getValue();
+    Register ValueReg = St.getValueReg();
+    switch (StoreSize) {
+    case 8:
+      HintOpc = AArch64::ATOMIC_STORE_HINT_B;
+      break;
+    case 16: {
+      Register CastReg;
+      if (mi_match(ValueReg, MRI, m_GBitcast(m_Reg(CastReg)))) {
+        auto Undef = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF,
+                                    {&AArch64::FPR32RegClass}, {});
+        auto Ins = MIB.buildInstr(TargetOpcode::INSERT_SUBREG,
+                                  {&AArch64::FPR32RegClass}, {Undef, ValueReg})
+                       .addImm(AArch64::hsub);
+        constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
+        constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+        ValueReg = Ins.getReg(0);
+      }
+      HintOpc = AArch64::ATOMIC_STORE_HINT_H;
+      break;
+    }
+    case 32:
+      HintOpc = AArch64::ATOMIC_STORE_HINT_S;
+      break;
+    case 64:
+      HintOpc = AArch64::ATOMIC_STORE_HINT_D;
+      break;
+    default:
+      llvm_unreachable("Unexpected getMemSizeInBits() value for atomic hint.");
+    }
+
+    unsigned HintImm = Hint == AArch64AtomicStoreHint::HINT_STSHH_KEEP ? 0 : 1;
+
+    auto StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc))
+                         .addReg(St.getPointerReg())
+                         .addReg(ValueReg)
+                         .addImm((int)toCABI(St.getMMO().getSuccessOrdering()))
+                         .addImm(static_cast<unsigned>(HintImm));
+
+    StrPseudo.cloneMemRefs(I);
+    I.eraseFromParent();
+    constrainSelectedInstRegOperands(*StrPseudo, TII, TRI, RBI);
+    return true;
+  }
   default:
     return false;
   }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll 
b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
new file mode 100644
index 0000000000000..dfcfa92cbc6c8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
@@ -0,0 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | 
FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel=1 
-verify-machineinstrs < %s | FileCheck %s
+
+;
+; STSHH: Keep, Relaxed
+;
+
+define dso_local void @test_atomic_store_keep_relaxed_i8(ptr %ptr, i8 %val) 
nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strb w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i8 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_relaxed_i16(ptr %ptr, i16 %val) 
nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strh w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i16 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_relaxed_i32(ptr %ptr, i32 %val) 
nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i32 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_relaxed_i64(ptr %ptr, i64 %val) 
nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str x1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i64 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+;
+; STSHH: Keep, Release
+;
+
+define dso_local void @test_atomic_store_keep_release_bfloat(ptr %ptr, bfloat 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlrh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic bfloat %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_release_half(ptr %ptr, half 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_half:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlrh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic half %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_release_float(ptr %ptr, float 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlr w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic float %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_release_double(ptr %ptr, double 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlr x8, [x0]
+; CHECK-NEXT:    ret
+  store atomic double %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+;
+; STSHH: Keep, SequentiallyConsistent
+;
+
+define dso_local void @test_atomic_store_keep_seqcst_i8(ptr %ptr, i8 %val) 
nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlrb w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i8 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_seqcst_i16(ptr %ptr, i16 %val) 
nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlrh w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i16 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_seqcst_i32(ptr %ptr, i32 %val) 
nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlr w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i32 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_seqcst_i64(ptr %ptr, i64 %val) 
nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlr x1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i64 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+;
+; STSHH: Stream, Relaxed
+;
+
+define dso_local void @test_atomic_store_strm_relaxed_bfloat(ptr %ptr, bfloat 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic bfloat %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint 
!1
+  ret void
+}
+
+define dso_local void @test_atomic_store_strm_relaxed_half(ptr %ptr, half 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_half:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic half %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_strm_relaxed_float(ptr %ptr, float 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic float %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_strm_relaxed_double(ptr %ptr, double 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str x8, [x0]
+; CHECK-NEXT:    ret
+  store atomic double %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint 
!1
+  ret void
+}
+
+;
+; STSHH: Stream, Release
+;
+
+define dso_local void @test_atomic_store_stream_release_i8(ptr %ptr, i8 %val) 
nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrb w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_release_i16(ptr %ptr, i16 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrh w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i16 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_release_i32(ptr %ptr, i32 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i32 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_release_i64(ptr %ptr, i64 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr x1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i64 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+;
+; STSHH: Stream, SequentiallyConsistent
+;
+
+define dso_local void @test_atomic_store_stream_seqcst_bfloat(ptr %ptr, bfloat 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic bfloat %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_seqcst_i16(ptr %ptr, half 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic half %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_seqcst_i32(ptr %ptr, float 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic float %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_seqcst_double(ptr %ptr, double 
%val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr x8, [x0]
+; CHECK-NEXT:    ret
+  store atomic double  %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+!0 = !{i32 0}
+!1 = !{i32 1}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir 
b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
new file mode 100644
index 0000000000000..ed69efafb04c3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
@@ -0,0 +1,30 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=aarch64-expand-pseudo 
-verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+  define void @test_atomic_store_keep_release_i8(ptr %ptr, i8 %val) {
+    store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+    ret void
+  }
+
+  !0 = !{i32 0}
+...
+
+---
+name:            test_atomic_store_keep_release_i8
+liveins:
+  - { reg: '$x0', virtual-reg: '' }
+  - { reg: '$w1', virtual-reg: '' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $w1, $x0
+
+    ; CHECK-LABEL: name: test_atomic_store_keep_release_i8
+    ; CHECK: BUNDLE implicit killed $w1, implicit $x0 :: (store release (s8) 
into %ir.ptr, align 8) {
+    ; CHECK-NEXT: STSHH 0
+    ; CHECK-NEXT: STRBBui killed renamable $w1, $x0, 0 :: (store release (s8) 
into %ir.ptr, align 8)
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: RET undef $lr
+
+    ATOMIC_STORE_HINT_B killed renamable $x0, killed renamable $w1, 0, 0 :: 
(store release (s8) into %ir.ptr, align 8)
+    RET_ReallyLR
+...

>From 189353e701d9e647684c22107c462512713ff4c9 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <[email protected]>
Date: Tue, 19 May 2026 10:43:46 +0000
Subject: [PATCH 2/3] Move expandAtomicStoreHintPseudo to AArch64AsmPrinter.cpp

---
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 51 +++++++++++++++++
 .../AArch64/AArch64ExpandPseudoInsts.cpp      | 56 -------------------
 .../lib/Target/AArch64/AArch64InstrAtomics.td |  1 +
 .../Atomics/aarch64-atomic-store-hint.mir     | 30 ----------
 4 files changed, 52 insertions(+), 86 deletions(-)
 delete mode 100644 
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir

diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp 
b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index b16c0460adf38..11c26bb42d423 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -278,6 +278,9 @@ class AArch64AsmPrinter : public AsmPrinter {
   // Emit expansion of Compare-and-branch pseudo instructions
   void emitCBPseudoExpansion(const MachineInstr *MI);
 
+  // Emit expansion of atomic store with hint pseudo instructions
+  void emitAtomicHintPseudoExpansion(const MachineInstr *MI, unsigned Size);
+
   void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
   void EmitToStreamer(const MCInst &Inst) {
     EmitToStreamer(*OutStreamer, Inst);
@@ -3126,6 +3129,42 @@ void AArch64AsmPrinter::emitCBPseudoExpansion(const 
MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, Inst);
 }
 
+void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI,
+                                                      unsigned Size) {
+
+  unsigned StOpc;
+  unsigned Order = MI->getOperand(2).getImm();
+  bool Relaxed = Order == 0;
+  switch (Size) {
+  case 8:
+    StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
+    break;
+  case 16:
+    StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
+    break;
+  case 32:
+    StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
+    break;
+  case 64:
+    StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
+    break;
+  default:
+    llvm_unreachable("Unexpected atomic hint size.");
+  }
+
+  EmitToStreamer(
+      MCInstBuilder(AArch64::STSHH).addImm(MI->getOperand(3).getImm()));
+
+  MCInst Store;
+  Store.setOpcode(StOpc);
+  Store.addOperand(MCOperand::createReg(MI->getOperand(1).getReg()));
+  Store.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+  Store.setFlags(MI->getFlags());
+  if (Relaxed)
+    Store.addOperand(MCOperand::createImm(0));
+  EmitToStreamer(*OutStreamer, Store);
+}
+
 // Simple pseudo-instructions have their lowering (with expansion to real
 // instructions) auto-generated.
 #include "AArch64GenMCPseudoLowering.inc"
@@ -3813,6 +3852,18 @@ void AArch64AsmPrinter::emitInstruction(const 
MachineInstr *MI) {
   case AArch64::CBXPrr:
     emitCBPseudoExpansion(MI);
     return;
+  case AArch64::ATOMIC_STORE_HINT_B:
+    emitAtomicHintPseudoExpansion(MI, 8);
+    return;
+  case AArch64::ATOMIC_STORE_HINT_H:
+    emitAtomicHintPseudoExpansion(MI, 16);
+    return;
+  case AArch64::ATOMIC_STORE_HINT_S:
+    emitAtomicHintPseudoExpansion(MI, 32);
+    return;
+  case AArch64::ATOMIC_STORE_HINT_D:
+    emitAtomicHintPseudoExpansion(MI, 64);
+    return;
   }
 
   if (emitDeactivationSymbolRelocation(MI->getDeactivationSymbol()))
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp 
b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 81fb5619f57b0..5fa93da1544fc 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -70,9 +70,6 @@ class AArch64ExpandPseudoImpl {
                             MachineBasicBlock::iterator MBBI);
   bool expandSVEBitwisePseudo(MachineInstr &MI, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI);
-  bool expandAtomicStoreHintPseudo(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned Size);
   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
                       unsigned ExtendImm, unsigned ZeroReg,
@@ -1311,51 +1308,6 @@ bool AArch64ExpandPseudoImpl::expandFormTuplePseudo(
   return true;
 }
 
-bool AArch64ExpandPseudoImpl::expandAtomicStoreHintPseudo(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Size) {
-  MachineInstr &MI = *MBBI;
-  DebugLoc DL = MI.getDebugLoc();
-
-  unsigned StOpc;
-  unsigned Order = MI.getOperand(2).getImm();
-  bool Relaxed = Order == 0;
-
-  switch (Size) {
-  case 8:
-    StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
-    break;
-  case 16:
-    StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
-    break;
-  case 32:
-    StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
-    break;
-  case 64:
-    StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
-    break;
-  default:
-    llvm_unreachable("Unexpected atomic hint size.");
-  }
-
-  auto *Hint = BuildMI(MBB, MBBI, DL, TII->get(AArch64::STSHH))
-                   .addImm(MI.getOperand(3).getImm())
-                   .getInstr();
-
-  auto Store = BuildMI(MBB, MBBI, DL, TII->get(StOpc))
-                   .add(MI.getOperand(1))
-                   .addReg(MI.getOperand(0).getReg())
-                   .setMemRefs(MI.memoperands())
-                   .setMIFlags(MI.getFlags());
-
-  if (Relaxed)
-    Store.addImm(0);
-
-  transferImpOps(MI, Store, Store);
-  finalizeBundle(MBB, Hint->getIterator(), MBBI->getIterator());
-  MI.eraseFromParent();
-  return true;
-}
-
 /// If MBBI references a pseudo instruction that should be expanded here,
 /// do the expansion and return true.  Otherwise return false.
 bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
@@ -1996,14 +1948,6 @@ bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock 
&MBB,
   case AArch64::NAND_ZZZ:
   case AArch64::NOR_ZZZ:
     return expandSVEBitwisePseudo(MI, MBB, MBBI);
-  case AArch64::ATOMIC_STORE_HINT_B:
-    return expandAtomicStoreHintPseudo(MBB, MBBI, 8);
-  case AArch64::ATOMIC_STORE_HINT_H:
-    return expandAtomicStoreHintPseudo(MBB, MBBI, 16);
-  case AArch64::ATOMIC_STORE_HINT_S:
-    return expandAtomicStoreHintPseudo(MBB, MBBI, 32);
-  case AArch64::ATOMIC_STORE_HINT_D:
-    return expandAtomicStoreHintPseudo(MBB, MBBI, 64);
   }
   return false;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td 
b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 7d719239ecc02..af57d471e0411 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -325,6 +325,7 @@ class atomic_hint_stshh_strm_seqcst<PatFrag base>
 class BaseStoreHintPseudo<RegisterClass regtype>
       : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
                         i32imm:$order, i32imm:$hint), []>, 
Sched<[WriteAtomic]> {
+  let Size = 8;
   let isCodeGenOnly = 1;
   let hasSideEffects = 1;
   let mayStore = 1;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir 
b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
deleted file mode 100644
index ed69efafb04c3..0000000000000
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
+++ /dev/null
@@ -1,30 +0,0 @@
-# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=aarch64-expand-pseudo 
-verify-machineinstrs %s -o - | FileCheck %s
-
---- |
-  define void @test_atomic_store_keep_release_i8(ptr %ptr, i8 %val) {
-    store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
-    ret void
-  }
-
-  !0 = !{i32 0}
-...
-
----
-name:            test_atomic_store_keep_release_i8
-liveins:
-  - { reg: '$x0', virtual-reg: '' }
-  - { reg: '$w1', virtual-reg: '' }
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: $w1, $x0
-
-    ; CHECK-LABEL: name: test_atomic_store_keep_release_i8
-    ; CHECK: BUNDLE implicit killed $w1, implicit $x0 :: (store release (s8) 
into %ir.ptr, align 8) {
-    ; CHECK-NEXT: STSHH 0
-    ; CHECK-NEXT: STRBBui killed renamable $w1, $x0, 0 :: (store release (s8) 
into %ir.ptr, align 8)
-    ; CHECK-NEXT: }
-    ; CHECK-NEXT: RET undef $lr
-
-    ATOMIC_STORE_HINT_B killed renamable $x0, killed renamable $w1, 0, 0 :: 
(store release (s8) into %ir.ptr, align 8)
-    RET_ReallyLR
-...

>From d9a3b6dfe3925ef1206fcd822d0c64b46fc1ba62 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <[email protected]>
Date: Fri, 12 Jun 2026 10:38:00 +0000
Subject: [PATCH 3/3] - Document aarch64.atomic.hint in LangRef - Rewrite
 patterns - Use LLVM ordering in patterns & expansion - Add Clang tests for
 more types - Remove AARCH64_ATOMIC_STORE_HINT_MD

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  10 +-
 clang/lib/Sema/SemaARM.cpp                    |   9 +-
 clang/test/CodeGen/arm_acle.c                 |  90 ++++++++++++++
 clang/test/Sema/builtins-arm64.c              |   6 +-
 llvm/docs/LangRef.rst                         |  34 +++++-
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |   2 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |   2 +-
 .../lib/Target/AArch64/AArch64InstrAtomics.td | 115 ++++--------------
 llvm/lib/Target/AArch64/AArch64InstrInfo.h    |   1 -
 .../GISel/AArch64InstructionSelector.cpp      |  32 ++++-
 10 files changed, 192 insertions(+), 109 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 11ddd5b61e4cb..975f37fb07a7d 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9605,6 +9605,11 @@ def err_atomic_builtin_must_be_pointer : Error<
 def err_atomic_builtin_must_be_pointer_intptr : Error<
   "address argument to atomic builtin must be a pointer to integer or pointer"
   " (%0 invalid)">;
+
+def err_atomic_hint_builtin_must_be_pointer : Error<
+  "address argument to atomic hint builtin must be a pointer to a scalar "
+  "integral or floating-point type of 8, 16, 32, or 64 bits (%0 invalid)">;
+
 def err_atomic_builtin_cannot_be_const : Error<
   "address argument to atomic builtin cannot be const-qualified (%0 invalid)">;
 def err_atomic_builtin_must_be_pointer_intfltptr : Error<
@@ -9671,8 +9676,9 @@ def err_atomic_op_hint_data_size : Error<
   "address argument to atomic store with hint must be of size 8, 16, 32 or 64 
bits">;
 def err_atomic_hint_has_invalid_memory_order : Error<
    "invalid memory order argument to atomic hint operation (%0 invalid)">;
-def err_atomic_hint_has_invalid_hint_type : Error<
-   "invalid hint type argument to atomic hint operation (%0 invalid)">;
+def warn_atomic_hint_has_invalid_hint_type : Warning<
+   "unrecognised hint type argument to atomic hint operation (%0)">,
+  InGroup<DiagGroup<"atomic-hint-type">>;
 def warn_atomic_op_has_invalid_memory_order : Warning<
   "%select{|success |failure }0memory order argument to atomic operation is 
invalid">,
   InGroup<DiagGroup<"atomic-memory-ordering">>;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 78f83d18deab8..33d1750287b03 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -338,7 +338,7 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned 
BuiltinID,
   auto *PtrTy = PtrArg->getType()->getAs<PointerType>();
   if (!PtrTy)
     return Diag(TheCall->getBeginLoc(),
-                diag::err_atomic_builtin_must_be_pointer)
+                diag::err_atomic_hint_builtin_must_be_pointer)
            << PtrArg->getType() << 0 << PtrArg->getSourceRange();
   QualType PtrQT = PtrTy->getPointeeType();
 
@@ -348,8 +348,7 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned 
BuiltinID,
                 diag::err_atomic_op_needs_atomic_int_or_fp)
            << 0 << PtrQT << PtrArg->getSourceRange();
 
-  unsigned TySize =
-      
Context.getTypeSize(Context.getCanonicalType(PtrQT).getUnqualifiedType());
+  unsigned TySize = Context.getTypeSize(PtrQT);
   if (TySize != 8 && TySize != 16 && TySize != 32 && TySize != 64)
     return Diag(TheCall->getBeginLoc(), diag::err_atomic_op_hint_data_size)
            << PtrArg->getSourceRange();
@@ -398,14 +397,14 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned 
BuiltinID,
   std::optional<llvm::APSInt> HintAP = 
HintArg->getIntegerConstantExpr(Context);
   if (!HintAP)
     return Diag(TheCall->getBeginLoc(),
-                diag::err_atomic_hint_has_invalid_hint_type)
+                diag::warn_atomic_hint_has_invalid_hint_type)
            << HintArg->getType() << HintArg->getSourceRange();
 
   unsigned Hint = HintAP->getZExtValue();
   if (llvm::getAtomicStoreHintFromMD(Hint) ==
       llvm::AArch64AtomicStoreHint::HINT_NONE)
     return Diag(TheCall->getBeginLoc(),
-                diag::err_atomic_hint_has_invalid_hint_type)
+                diag::warn_atomic_hint_has_invalid_hint_type)
            << *HintAP << HintArg->getSourceRange();
 
   return false;
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index a8aa0916a8a4c..69ad674193b9d 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1833,6 +1833,24 @@ void test_atomic_store_hint_char(char *ptr, char data) {
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_uchar(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i8 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, 
align 1, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_uchar(unsigned char *ptr, unsigned char data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_schar(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i8 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, 
align 1, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_schar(signed char *ptr, signed char data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_bfloat(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic bfloat [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 2, !aarch64.atomic.hint [[META4:![0-9]+]]
@@ -1842,6 +1860,15 @@ void test_atomic_store_hint_bfloat(__bf16 *ptr, __bf16 
data) {
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 1);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_half(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic half [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 2, !aarch64.atomic.hint [[META4:![0-9]+]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_half(__fp16 *ptr, __fp16 data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 1);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_short(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic i16 [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 2, !aarch64.atomic.hint [[META3]]
@@ -1851,6 +1878,33 @@ void test_atomic_store_hint_short(short *ptr, short 
data) {
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_ushort(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i16 [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 2, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_ushort(unsigned short *ptr, unsigned short data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_int(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, 
align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_int(int *ptr, int data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_unsigned(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, 
align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_unsigned(unsigned *ptr, unsigned data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_u32(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, 
align 4, !aarch64.atomic.hint [[META3]]
@@ -1860,6 +1914,15 @@ void test_atomic_store_hint_u32(uint32_t *ptr, uint32_t 
data) {
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_s32(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, 
align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_s32(int32_t *ptr, int32_t data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_float(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic float [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, 
align 4, !aarch64.atomic.hint [[META3]]
@@ -1878,6 +1941,24 @@ void test_atomic_store_hint_s64(int64_t *ptr, int64_t 
data) {
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_long(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_long(long *ptr, long data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_ulong(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_ulong(unsigned long *ptr, unsigned long data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_long_long_int(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 8, !aarch64.atomic.hint [[META3]]
@@ -1887,6 +1968,15 @@ void test_atomic_store_hint_long_long_int(long long int 
*ptr, long long int data
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_long_long_uint(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, 
align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_long_long_uint(unsigned long long int *ptr, 
unsigned long long int data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_double(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic double [[DATA:%.*]], ptr [[PTR:%.*]] 
monotonic, align 8, !aarch64.atomic.hint [[META4]]
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index fb4718a1bd1f4..5372aff24b739 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -58,13 +58,13 @@ void test_atomic_store_hint(char *c_ptr, __int128 *inv_ptr, 
float *f_ptr,
   __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0); // expected-error 
{{too few arguments to function call, expected 4, have 3}}
   __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 0, 0); // 
expected-error {{too many arguments to function call, expected 4, have 5}}
 
-  __builtin_arm_atomic_store_with_hint(0, c_data, 0, 0); // expected-error 
{{address argument to atomic builtin must be a pointer ('int' invalid)}}
+  __builtin_arm_atomic_store_with_hint(0, c_data, 0, 0); // expected-error 
{{address argument to atomic hint builtin must be a pointer to a scalar 
integral or floating-point type of 8, 16, 32, or 64 bits ('int' invalid)}}
   __builtin_arm_atomic_store_with_hint(c_ptr, f_data, 0, 0); // expected-error 
{{arguments are of different types ('char' vs 'float')}}
   __builtin_arm_atomic_store_with_hint(inv_ptr, inv_data, 0, 0); // 
expected-error {{address argument to atomic store with hint must be of size 8, 
16, 32 or 64 bits}}
 
   __builtin_arm_atomic_store_with_hint(c_ptr, c_data, inv_int, 0); // 
expected-error {{invalid memory order argument to atomic hint operation ('int' 
invalid)}}
   __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 2, 0); // expected-error 
{{invalid memory order argument to atomic hint operation (2 invalid)}}
 
-  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, inv_int); // 
expected-error {{invalid hint type argument to atomic hint operation ('int' 
invalid)}}
-  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 3); // expected-error 
{{invalid hint type argument to atomic hint operation (3 invalid)}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, inv_int); // 
expected-warning {{unrecognised hint type argument to atomic hint operation 
('int')}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 3); // 
expected-warning {{unrecognised hint type argument to atomic hint operation 
(3)}}
 }
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 92cbfab50c8ef..1b459b35709c8 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -8683,6 +8683,33 @@ to the SSA value of the pointer operand.
 Note that this is an experimental feature, which means that its semantics might
 change in the future.
 
+.. _md_aarch64.atomic.hint:
+
+'``aarch64.atomic.hint``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``aarch64.atomic.hint`` metadata may be attached to an atomic store
+instruction, referencing a single metadata node containing a single ``i32``
+entry:
+
+.. code-block:: llvm
+
+  store atomic i64 %x, ptr %y seq_cst, align 8, !aarch64.atomic.hint !0
+
+  ...
+  !0 = !{i32 1}
+
+On AArch64 targets, this metadata may be used to emit an atomic store together
+with a hint instruction. The hint is a suggestion to the compiler which may be
+used when selecting code sequences, but it is not required to emit a specific
+hint instruction. The following hint values are currently recognised:
+
+  * ``0``: ``stshh keep`` hint.
+  * ``1``: ``stshh strm`` hint.
+
+If the compiler does not recognise the hint value provided, it may ignore the
+metadata. Targets that do not support this metadata may also ignore it.
+
 '``type``' Metadata
 ^^^^^^^^^^^^^^^^^^^
 
@@ -12154,9 +12181,10 @@ Syntax:
 ::
 
       store [volatile] <ty> <value>, ptr <pointer>[, align <alignment>][, 
!nontemporal !<nontemp_node>][, !invariant.group !<empty_node>]        ; yields 
void
-      store atomic [volatile] <ty> <value>, ptr <pointer> 
[syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group 
!<empty_node>] ; yields void
+      store atomic [volatile] <ty> <value>, ptr <pointer> 
[syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group 
!<empty_node>][, !aarch64.atomic.hint !<aarch64_hint_value>]; yields void
       !<nontemp_node> = !{ i32 1 }
       !<empty_node> = !{}
+      !<aarch64_hint_value> = !{ i32 <hint> }
 
 Overview:
 """""""""
@@ -12212,6 +12240,10 @@ x86.
 The optional ``!invariant.group`` metadata must reference a
 single metadata name ``<empty_node>``. See ``invariant.group`` metadata.
 
+The optional ``!aarch64.atomic.hint`` metadata must reference a single metadata
+name ``<aarch64_hint_value>``. See ``aarch64.atomic.hint`` metadata.
+
+
 Semantics:
 """"""""""
 
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp 
b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 11c26bb42d423..e16a6ac3c9aca 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -3134,7 +3134,7 @@ void 
AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI,
 
   unsigned StOpc;
   unsigned Order = MI->getOperand(2).getImm();
-  bool Relaxed = Order == 0;
+  bool Relaxed = Order == 2;
   switch (Size) {
   case 8:
     StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 49d99f6f8e80d..4a4b711ee7448 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18665,7 +18665,7 @@ AArch64TargetLowering::getTargetMMOFlags(const 
Instruction &I) const {
     return MOStridedAccess;
 
   auto Flags = MachineMemOperand::MONone;
-  const MDNode *AtomicStHint = I.getMetadata(AARCH64_ATOMIC_STORE_HINT_MD);
+  const MDNode *AtomicStHint = I.getMetadata("aarch64.atomic.hint");
   if (AtomicStHint) {
     unsigned HintVal =
         cast<ConstantInt>(
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td 
b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index af57d471e0411..af06ef9014031 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -286,42 +286,6 @@ def : Pat<(relaxed_store<atomic_store_64>
 // Atomic store with hint pseudos
 //===----------------------------------
 
-class seq_cst_store<PatFrags base>
-  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
-  let IsAtomic = 1;
-  let IsAtomicOrderingSequentiallyConsistent = 1;
-}
-
-class atomic_hint_stshh_keep_relaxed<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (relaxed_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_KEEP(N); }]>;
-
-class atomic_hint_stshh_keep_release<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (releasing_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_KEEP(N); }]>;
-
-class atomic_hint_stshh_keep_seqcst<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (seq_cst_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_KEEP(N); }]>;
-
-class atomic_hint_stshh_strm_relaxed<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (relaxed_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_STRM(N); }]>;
-
-class atomic_hint_stshh_strm_release<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (releasing_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_STRM(N); }]>;
-
-class atomic_hint_stshh_strm_seqcst<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (seq_cst_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_STRM(N); }]>;
-
 class BaseStoreHintPseudo<RegisterClass regtype>
       : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
                         i32imm:$order, i32imm:$hint), []>, 
Sched<[WriteAtomic]> {
@@ -336,60 +300,33 @@ def ATOMIC_STORE_HINT_H : BaseStoreHintPseudo<GPR32>;
 def ATOMIC_STORE_HINT_S : BaseStoreHintPseudo<GPR32>;
 def ATOMIC_STORE_HINT_D : BaseStoreHintPseudo<GPR64>;
 
+class atomic_hint_store<PatFrag Base, bit Rel, bit SeqCst, code Pred, code 
GIPred>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (Base node:$val, node:$ptr), Pred> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingReleaseOrStronger = Rel;
+  let IsAtomicOrderingSequentiallyConsistent = SeqCst;
+  let GISelPredicateCode = GIPred;
+}
+
+multiclass AtomicHintPatterns<int Order, int Hint, bit Rel, bit SeqCst, code 
Pred, code GIPred> {
+  def : Pat<(atomic_hint_store<atomic_store_8, Rel, SeqCst, Pred, GIPred> 
GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 
Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_16, Rel, SeqCst, Pred, GIPred> 
GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 
Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_32, Rel, SeqCst, Pred, GIPred> 
GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 
Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_64, Rel, SeqCst, Pred, GIPred> 
GPR64sp:$addr, GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 Order), (i32 
Hint))>;
+}
+
 let AddedComplexity = 15 in {
-  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
0))>;
-  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
0))>;
-  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
0))>;
-  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 
0))>;
-
-  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
0))>;
-  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
0))>;
-  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
0))>;
-  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 
0))>;
-
-  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
0))>;
-  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
0))>;
-  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
0))>;
-  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 
0))>;
-
-  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
1))>;
-  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
1))>;
-  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 
1))>;
-  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 
1))>;
-
-  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
1))>;
-  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
1))>;
-  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 
1))>;
-  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 
1))>;
-
-  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_8> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
1))>;
-  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_16> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
1))>;
-  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_32> GPR64sp:$addr, 
GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 
1))>;
-  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_64> GPR64sp:$addr, 
GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 
1))>;
+  defm : AtomicHintPatterns<2, 0, 0, 0, [{ return isAtomicSTSHH_KEEP(N); }], 
[{ return isAtomicSTSHH_KEEP(MI); }]>;
+  defm : AtomicHintPatterns<5, 0, 1, 0, [{ return isAtomicSTSHH_KEEP(N); }], 
[{ return isAtomicSTSHH_KEEP(MI); }]>;
+  defm : AtomicHintPatterns<7, 0, 0, 1, [{ return isAtomicSTSHH_KEEP(N); }], 
[{ return isAtomicSTSHH_KEEP(MI); }]>;
+  defm : AtomicHintPatterns<2, 1, 0, 0, [{ return isAtomicSTSHH_STRM(N); }], 
[{ return isAtomicSTSHH_STRM(MI); }]>;
+  defm : AtomicHintPatterns<5, 1, 1, 0, [{ return isAtomicSTSHH_STRM(N); }], 
[{ return isAtomicSTSHH_STRM(MI); }]>;
+  defm : AtomicHintPatterns<7, 1, 0, 1, [{ return isAtomicSTSHH_STRM(N); }], 
[{ return isAtomicSTSHH_STRM(MI); }]>;
 }
 
 //===----------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 06fb6cbbabe5a..1eb5dac67ea2a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -37,7 +37,6 @@ static const MachineMemOperand::Flags MOAtomicHintBit1 =
     MachineMemOperand::MOTargetFlag4;
 
 #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
-#define AARCH64_ATOMIC_STORE_HINT_MD "aarch64.atomic.hint"
 
 // AArch64 MachineCombiner patterns
 enum AArch64MachineCombinerPattern : unsigned {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp 
b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index fe98659b6900e..3c1f7e6213a36 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -522,6 +522,11 @@ class AArch64InstructionSelector : public 
InstructionSelector {
                                       MachineOperand &Predicate,
                                       MachineIRBuilder &MIRBuilder) const;
 
+  bool isAtomicHintInst(const MachineInstr &MI,
+                        AArch64AtomicStoreHint Hint) const;
+  bool isAtomicSTSHH_KEEP(const MachineInstr &MI) const;
+  bool isAtomicSTSHH_STRM(const MachineInstr &MI) const;
+
   /// Return true if \p MI is a load or store of \p NumBytes bytes.
   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
 
@@ -2546,17 +2551,15 @@ bool 
AArch64InstructionSelector::earlySelect(MachineInstr &I) {
     GStore &St = cast<GStore>(I);
     auto MMO = St.getMMO();
     LLT PtrTy = MRI.getType(St.getPointerReg());
+    AArch64AtomicStoreHint Hint = TII.decodeAtomicHintFlags(MMO.getFlags());
 
     // Only for handling atomic store with hint.
     // Can only handle AddressSpace 0, 64-bit pointers.
-    if (!St.isAtomic() || PtrTy != LLT::pointer(0, 64)) {
+    if (!St.isAtomic() || PtrTy != LLT::pointer(0, 64) ||
+        Hint == AArch64AtomicStoreHint::HINT_NONE) {
       return false;
     }
 
-    AArch64AtomicStoreHint Hint = TII.decodeAtomicHintFlags(MMO.getFlags());
-    if (Hint == AArch64AtomicStoreHint::HINT_NONE)
-      return false;
-
     unsigned HintOpc;
     unsigned StoreSize = St.getMemSizeInBits().getValue();
     Register ValueReg = St.getValueReg();
@@ -2594,7 +2597,7 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr 
&I) {
     auto StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc))
                          .addReg(St.getPointerReg())
                          .addReg(ValueReg)
-                         .addImm((int)toCABI(St.getMMO().getSuccessOrdering()))
+                         .addImm((int)MMO.getSuccessOrdering())
                          .addImm(static_cast<unsigned>(HintImm));
 
     StrPseudo.cloneMemRefs(I);
@@ -8089,6 +8092,23 @@ void 
AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
                                                       .getZExtValue()));
 }
 
+bool AArch64InstructionSelector::isAtomicHintInst(
+    const MachineInstr &MI, AArch64AtomicStoreHint Hint) const {
+  const GStore &St = cast<GStore>(MI);
+  auto MMO = St.getMMO();
+  return AArch64InstrInfo::decodeAtomicHintFlags(MMO.getFlags()) == Hint;
+}
+
+bool AArch64InstructionSelector::isAtomicSTSHH_KEEP(
+    const MachineInstr &MI) const {
+  return isAtomicHintInst(MI, AArch64AtomicStoreHint::HINT_STSHH_KEEP);
+}
+
+bool AArch64InstructionSelector::isAtomicSTSHH_STRM(
+    const MachineInstr &MI) const {
+  return isAtomicHintInst(MI, AArch64AtomicStoreHint::HINT_STSHH_STRM);
+}
+
 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
     const MachineInstr &MI, unsigned NumBytes) const {
   if (!MI.mayLoadOrStore())

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to