[clang] [llvm] [AArch64] Implement the atomic store with hint intrinsic (PR #198316)

Kerry McLaughlin via cfe-commits cfe-commits at lists.llvm.org
Fri Jun 26 08:16:02 PDT 2026


https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/198316

>From 106c21f129e83919e43489241b35c1378a16fb05 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Wed, 6 May 2026 13:08:02 +0000
Subject: [PATCH 1/4] [AArch64] Implement the atomic store with hint intrinsic

Adds the following ACLE intrinsic as described in [1]:

void __arm_atomic_store_with_hint(type *ptr, type data,
                                  int memory_order, int hint);

A regular atomic store instruction is emitted in Clang for this builtin
with additional metadata (`!aarch64.atomic.hint`), which ensures the
instruction is recognised as atomic by passes in LLVM.
When an atomic store has this metadata, this lowers to the ATOMIC_STORE_HINT
pseudo which is later expanded by AArch64ExpandPseudoInsts into an STSHH
instruction plus an atomic store.

The hint value is represented using MOTargetFlag3 & MOTargetFlag4 flags,
which will need to be extended when new hints are added in future.

[1] https://github.com/ARM-software/acle/pull/432
---
 clang/include/clang/Basic/BuiltinsAArch64.td  |   4 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   6 +
 clang/include/clang/Sema/SemaARM.h            |   1 +
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      |  54 ++++
 clang/lib/Headers/arm_acle.h                  |   6 +
 clang/lib/Sema/SemaARM.cpp                    |  92 ++++++
 clang/test/CodeGen/arm_acle.c                 |  78 +++++
 clang/test/CodeGen/builtins-arm64.c           |  13 +
 clang/test/Sema/builtins-arm64.c              |  17 ++
 .../include/llvm/Support/AArch64AtomicHints.h |  36 +++
 llvm/lib/CodeGen/AtomicExpandPass.cpp         |   3 +
 .../AArch64/AArch64ExpandPseudoInsts.cpp      |  56 ++++
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |  20 ++
 .../Target/AArch64/AArch64ISelLowering.cpp    |  21 +-
 .../lib/Target/AArch64/AArch64InstrAtomics.td | 109 +++++++
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |  13 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.h    |   9 +
 .../GISel/AArch64InstructionSelector.cpp      |  61 ++++
 .../Atomics/aarch64-atomic-store-hint.ll      | 288 ++++++++++++++++++
 .../Atomics/aarch64-atomic-store-hint.mir     |  30 ++
 20 files changed, 916 insertions(+), 1 deletion(-)
 create mode 100644 llvm/include/llvm/Support/AArch64AtomicHints.h
 create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
 create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 15257f3db5b41..19a65e7bdf8fe 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -171,6 +171,10 @@ let Attributes = [NoThrow], Features = "ls64" in {
 	def st64bv0 : AArch64TargetBuiltin<"uint64_t (void *, uint64_t const *)">;
 }
 
+let Attributes = [NoThrow, CustomTypeChecking] in {
+  def atomic_store_with_hint : AArch64Builtin<"void(...)">;
+}
+
 // Armv9.3-A Guarded Control Stack
 let Attributes = [NoThrow], Features = "gcs" in {
 	def gcspopm : AArch64TargetBuiltin<"uint64_t (uint64_t)">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f84cd8dca6d4c..11ddd5b61e4cb 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9667,6 +9667,12 @@ def err_atomic_op_needs_atomic_int_or_fp : Error<
 def err_atomic_op_needs_atomic_int : Error<
   "address argument to atomic operation must be a pointer to "
   "%select{|atomic }0integer (%1 invalid)">;
+def err_atomic_op_hint_data_size : Error<
+  "address argument to atomic store with hint must be of size 8, 16, 32 or 64 bits">;
+def err_atomic_hint_has_invalid_memory_order : Error<
+   "invalid memory order argument to atomic hint operation (%0 invalid)">;
+def err_atomic_hint_has_invalid_hint_type : Error<
+   "invalid hint type argument to atomic hint operation (%0 invalid)">;
 def warn_atomic_op_has_invalid_memory_order : Warning<
   "%select{|success |failure }0memory order argument to atomic operation is invalid">,
   InGroup<DiagGroup<"atomic-memory-ordering">>;
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index af8e0e9047171..b0a01c40ffece 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -70,6 +70,7 @@ class SemaARM : public SemaBase {
   bool BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, int ArgNum,
                             unsigned ExpectedFieldNum, bool AllowName);
   bool BuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall);
+  bool BuiltinARMAtomicStoreHintCall(unsigned BuiltinID, CallExpr *TheCall);
 
   bool MveAliasValid(unsigned BuiltinID, llvm::StringRef AliasName);
   bool CdeAliasValid(unsigned BuiltinID, llvm::StringRef AliasName);
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 4c668dabd53dc..4a13767268f96 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/IR/IntrinsicsARM.h"
 #include "llvm/IR/IntrinsicsBPF.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 #include "llvm/TargetParser/AArch64TargetParser.h"
 
 #include <numeric>
@@ -2129,6 +2130,56 @@ static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
                             Ops);
 }
 
+static Value *EmitAtomicStoreWithHintBuiltin(CodeGenFunction &CGF,
+                                             unsigned BuiltinID,
+                                             const CallExpr *E) {
+  CodeGen::CGBuilderTy &Builder = CGF.Builder;
+  CodeGen::CodeGenModule &CGM = CGF.CGM;
+  Expr::EvalResult Result;
+  if (!E->getArg(2)->EvaluateAsInt(Result, CGM.getContext()))
+    llvm_unreachable(
+        "Expected integer policy argument to atomic store with hint.");
+
+  StoreInst *Store =
+      Builder.CreateStore(CGF.EmitScalarExpr(E->getArg(1)),            // Value
+                          CGF.EmitPointerWithAlignment(E->getArg(0))); // Ptr;
+
+  AtomicOrdering Ordering;
+  unsigned OrderingArg = Result.Val.getInt().getExtValue();
+  assert(isValidAtomicOrderingCABI(OrderingArg) && "Invalid atomic ordering");
+
+  switch (static_cast<AtomicOrderingCABI>(OrderingArg)) {
+  default:
+    llvm_unreachable("Unsupported atomic ordering found.");
+  case AtomicOrderingCABI::relaxed:
+    Ordering = AtomicOrdering::Monotonic;
+    break;
+  case AtomicOrderingCABI::release:
+    Ordering = AtomicOrdering::Release;
+    break;
+  case AtomicOrderingCABI::seq_cst:
+    Ordering = AtomicOrdering::SequentiallyConsistent;
+    break;
+  }
+  Store->setAtomic(Ordering);
+
+  if (!E->getArg(3)->EvaluateAsInt(Result, CGM.getContext()))
+    llvm_unreachable(
+        "Expected integer hint argument to atomic store with hint.");
+  unsigned HintArg = Result.Val.getInt().getExtValue();
+  assert((getAtomicStoreHintFromMD(HintArg) !=
+          AArch64AtomicStoreHint::HINT_NONE) &&
+         "Invalid hint type");
+
+  MDNode *HintMDVal =
+      MDNode::get(CGM.getLLVMContext(),
+                  llvm::ConstantAsMetadata::get(Builder.getInt32(HintArg)));
+  Store->setMetadata(CGM.getModule().getMDKindID("aarch64.atomic.hint"),
+                     HintMDVal);
+
+  return Store;
+}
+
 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
 /// argument that specifies the vector type. The additional argument is meant
 /// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
@@ -4927,6 +4978,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
     return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
 
+  if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_hint)
+    return EmitAtomicStoreWithHintBuiltin(*this, BuiltinID, E);
+
   // Memory Tagging Extensions (MTE) Intrinsics
   Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
   switch (BuiltinID) {
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 9a6b6a837fa5a..bd99527dc5fa8 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -741,6 +741,12 @@ __arm_st64bv0(void *__addr, data512_t __value) {
 }
 #endif
 
+/* Atomic store with hints */
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+#define __arm_atomic_store_with_hint(ptr, data, memory_order, hint)            \
+  __builtin_arm_atomic_store_with_hint(ptr, data, memory_order, hint)
+#endif
+
 /* 11.1 Special register intrinsics */
 #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
 #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 5e7504fab416d..78f83d18deab8 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -17,6 +17,7 @@
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Sema.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 
 namespace clang {
 
@@ -322,6 +323,94 @@ bool SemaARM::BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
   return false;
 }
 
+bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
+                                            CallExpr *TheCall) {
+  if (SemaRef.checkArgCount(TheCall, 4))
+    return true;
+
+  // Arg 0 should be the pointer type. The pointee type must be a
+  // scalar integral or floating-point type of 8, 16, 32 or 64 bits.
+  ASTContext &Context = getASTContext();
+  Expr *PtrArg = TheCall->getArg(0);
+  auto PtrArgRes = SemaRef.DefaultFunctionArrayLvalueConversion(PtrArg);
+  if (PtrArgRes.isInvalid())
+    return true;
+  auto *PtrTy = PtrArg->getType()->getAs<PointerType>();
+  if (!PtrTy)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_builtin_must_be_pointer)
+           << PtrArg->getType() << 0 << PtrArg->getSourceRange();
+  QualType PtrQT = PtrTy->getPointeeType();
+
+  // TODO: Allow MFloat8 types when supported by atomic store
+  if (!PtrQT->isIntegralType(getASTContext()) && !PtrQT->isFloatingType())
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_op_needs_atomic_int_or_fp)
+           << 0 << PtrQT << PtrArg->getSourceRange();
+
+  unsigned TySize =
+      Context.getTypeSize(Context.getCanonicalType(PtrQT).getUnqualifiedType());
+  if (TySize != 8 && TySize != 16 && TySize != 32 && TySize != 64)
+    return Diag(TheCall->getBeginLoc(), diag::err_atomic_op_hint_data_size)
+           << PtrArg->getSourceRange();
+
+  // Arg 1 is the data to be stored. The type must match the pointee
+  // type found above.
+  auto DataArgRes =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(1));
+  if (DataArgRes.isInvalid())
+    return true;
+  QualType DataQT = DataArgRes.get()->getType();
+
+  if (PtrQT != DataQT)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_typecheck_call_different_arg_types)
+           << PtrQT << DataQT;
+
+  // Arg 2 is the memory order, which must be relaxed, release or seq_cst
+  auto MemOrdArg =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(2)).get();
+  std::optional<llvm::APSInt> MemOrdAP =
+      MemOrdArg->getIntegerConstantExpr(Context);
+  if (!MemOrdAP)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_hint_has_invalid_memory_order)
+           << MemOrdArg->getType() << MemOrdArg->getSourceRange();
+
+  unsigned Ordering = MemOrdAP->getZExtValue();
+  if (!llvm::isValidAtomicOrderingCABI(Ordering))
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_hint_has_invalid_memory_order)
+           << *MemOrdAP << MemOrdArg->getSourceRange();
+
+  auto AtomicOrdering = static_cast<llvm::AtomicOrderingCABI>(Ordering);
+  if (AtomicOrdering != llvm::AtomicOrderingCABI::relaxed &&
+      AtomicOrdering != llvm::AtomicOrderingCABI::release &&
+      AtomicOrdering != llvm::AtomicOrderingCABI::seq_cst)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_hint_has_invalid_memory_order)
+           << *MemOrdAP << MemOrdArg->getSourceRange();
+
+  // Arg 3 is the hint type. Only values represented by AArch64AtomicStoreHint
+  // are valid.
+  auto HintArg =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(3)).get();
+  std::optional<llvm::APSInt> HintAP = HintArg->getIntegerConstantExpr(Context);
+  if (!HintAP)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_hint_has_invalid_hint_type)
+           << HintArg->getType() << HintArg->getSourceRange();
+
+  unsigned Hint = HintAP->getZExtValue();
+  if (llvm::getAtomicStoreHintFromMD(Hint) ==
+      llvm::AArch64AtomicStoreHint::HINT_NONE)
+    return Diag(TheCall->getBeginLoc(),
+                diag::err_atomic_hint_has_invalid_hint_type)
+           << *HintAP << HintArg->getSourceRange();
+
+  return false;
+}
+
 /// getNeonEltType - Return the QualType corresponding to the elements of
 /// the vector type specified by the NeonTypeFlags.  This is used to check
 /// the pointer arguments for Neon load/store intrinsics.
@@ -1166,6 +1255,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
       BuiltinID == AArch64::BI__builtin_arm_wsrp)
     return BuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true);
 
+  if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_hint)
+    return BuiltinARMAtomicStoreHintCall(BuiltinID, TheCall);
+
   // Only check the valid encoding range. Any constant in this range would be
   // converted to a register of the form S2_2_C3_C4_5. Let the hardware throw
   // an exception for incorrect registers. This matches MSVC behavior.
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index cd18fa63bfdbd..a8aa0916a8a4c 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1821,3 +1821,81 @@ int test_rndrrs(uint64_t *__addr) {
   return __rndrrs(__addr);
 }
 #endif
+
+#if defined(__ARM_64BIT_STATE)
+
+// AArch64-LABEL: @test_atomic_store_hint_char(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i8 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 1, !aarch64.atomic.hint [[META3:![0-9]+]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_char(char *ptr, char data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_bfloat(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic bfloat [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META4:![0-9]+]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_bfloat(__bf16 *ptr, __bf16 data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 1);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_short(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i16 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_short(short *ptr, short data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_u32(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_u32(uint32_t *ptr, uint32_t data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_float(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic float [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_float(float *ptr, float data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_s64(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 8, !aarch64.atomic.hint [[META4]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_s64(int64_t *ptr, int64_t data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_long_long_int(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_long_long_int(long long int *ptr, long long int data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_double(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic double [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 8, !aarch64.atomic.hint [[META4]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_double(double *ptr, double data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1);
+}
+
+// AArch64: [[META3]] = !{i32 0}
+// AArch64-NEXT: [[META4]] = !{i32 1}
+#endif
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index 3d054c79f1777..ad9ba7feca671 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -216,4 +216,17 @@ void trap() {
   __builtin_arm_trap(42);
 }
 
+void atomic_store_with_hint(int64_t *a, int64_t b) {
+  __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELAXED, 0); // HINT_STSHH_KEEP
+  // CHECK: store atomic i64 {{.*}}, ptr {{.*}} monotonic, align 8, !aarch64.atomic.hint ![[M1:[0-9]]]
+
+  __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_SEQ_CST, 0);
+  // CHECK: store atomic i64 {{.*}}, ptr {{.*}} seq_cst, align 8, !aarch64.atomic.hint ![[M1]]
+
+  __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELEASE, 1); // HINT_STSHH_STRM
+  // CHECK: store atomic i64 {{.*}}, ptr {{.*}} release, align 8, !aarch64.atomic.hint ![[M2:[0-9]]]
+}
+
 // CHECK: ![[M0]] = !{!"1:2:3:4:5"}
+// CHECK: ![[M1]] = !{i32 0}
+// CHECK: ![[M2]] = !{i32 1}
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index 41cffd7ebb1a0..fb4718a1bd1f4 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -51,3 +51,20 @@ void test_trap(short s, unsigned short us) {
   __builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
   __builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
 }
+
+void test_atomic_store_hint(char *c_ptr, __int128 *inv_ptr, float *f_ptr,
+                            char c_data, __int128 inv_data, float f_data,
+                            int inv_int) {
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0); // expected-error {{too few arguments to function call, expected 4, have 3}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}}
+
+  __builtin_arm_atomic_store_with_hint(0, c_data, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, f_data, 0, 0); // expected-error {{arguments are of different types ('char' vs 'float')}}
+  __builtin_arm_atomic_store_with_hint(inv_ptr, inv_data, 0, 0); // expected-error {{address argument to atomic store with hint must be of size 8, 16, 32 or 64 bits}}
+
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, inv_int, 0); // expected-error {{invalid memory order argument to atomic hint operation ('int' invalid)}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 2, 0); // expected-error {{invalid memory order argument to atomic hint operation (2 invalid)}}
+
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, inv_int); // expected-error {{invalid hint type argument to atomic hint operation ('int' invalid)}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 3); // expected-error {{invalid hint type argument to atomic hint operation (3 invalid)}}
+}
diff --git a/llvm/include/llvm/Support/AArch64AtomicHints.h b/llvm/include/llvm/Support/AArch64AtomicHints.h
new file mode 100644
index 0000000000000..8118f3e2df3ad
--- /dev/null
+++ b/llvm/include/llvm/Support/AArch64AtomicHints.h
@@ -0,0 +1,36 @@
+//===-- AArch64AtomicHints.h - AArch64 Atomic Hint Attributes ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_AARCH64ATOMICHINTS_H
+#define LLVM_SUPPORT_AARCH64ATOMICHINTS_H
+
+namespace llvm {
+enum class AArch64AtomicStoreHint {
+  HINT_NONE = 0,
+  HINT_STSHH_KEEP = 1,
+  HINT_STSHH_STRM = 2,
+};
+
+template <typename Int> inline bool isValidAArch64AtomicHintValue(Int I) {
+  return (Int)AArch64AtomicStoreHint::HINT_STSHH_KEEP <= I &&
+         I <= (Int)AArch64AtomicStoreHint::HINT_STSHH_STRM;
+}
+
+template <typename Int>
+inline AArch64AtomicStoreHint getAtomicStoreHintFromMD(Int I) {
+  switch (I) {
+  case 0:
+    return AArch64AtomicStoreHint::HINT_STSHH_KEEP;
+  case 1:
+    return AArch64AtomicStoreHint::HINT_STSHH_STRM;
+  default:
+    return AArch64AtomicStoreHint::HINT_NONE;
+  }
+}
+} // namespace llvm
+#endif // LLVM_SUPPORT_AARCH64ATOMICHINTS_H
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 960d2492c2856..3b2d60fa4526c 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -254,6 +254,8 @@ static void copyMetadataForAtomic(Instruction &Dest,
         Dest.setMetadata(ID, N);
       else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
         Dest.setMetadata(ID, N);
+      else if (ID == Ctx.getMDKindID("aarch64.atomic.hint"))
+        Dest.setMetadata(ID, N);
 
       // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
       // uses.
@@ -719,6 +721,7 @@ StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
   NewSI->setAlignment(SI->getAlign());
   NewSI->setVolatile(SI->isVolatile());
   NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
+  copyMetadataForAtomic(*NewSI, *SI);
   LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
   SI->eraseFromParent();
   return NewSI;
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 5fa93da1544fc..81fb5619f57b0 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -70,6 +70,9 @@ class AArch64ExpandPseudoImpl {
                             MachineBasicBlock::iterator MBBI);
   bool expandSVEBitwisePseudo(MachineInstr &MI, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI);
+  bool expandAtomicStoreHintPseudo(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned Size);
   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
                       unsigned ExtendImm, unsigned ZeroReg,
@@ -1308,6 +1311,51 @@ bool AArch64ExpandPseudoImpl::expandFormTuplePseudo(
   return true;
 }
 
+bool AArch64ExpandPseudoImpl::expandAtomicStoreHintPseudo(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Size) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned StOpc;
+  unsigned Order = MI.getOperand(2).getImm();
+  bool Relaxed = Order == 0;
+
+  switch (Size) {
+  case 8:
+    StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
+    break;
+  case 16:
+    StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
+    break;
+  case 32:
+    StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
+    break;
+  case 64:
+    StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
+    break;
+  default:
+    llvm_unreachable("Unexpected atomic hint size.");
+  }
+
+  auto *Hint = BuildMI(MBB, MBBI, DL, TII->get(AArch64::STSHH))
+                   .addImm(MI.getOperand(3).getImm())
+                   .getInstr();
+
+  auto Store = BuildMI(MBB, MBBI, DL, TII->get(StOpc))
+                   .add(MI.getOperand(1))
+                   .addReg(MI.getOperand(0).getReg())
+                   .setMemRefs(MI.memoperands())
+                   .setMIFlags(MI.getFlags());
+
+  if (Relaxed)
+    Store.addImm(0);
+
+  transferImpOps(MI, Store, Store);
+  finalizeBundle(MBB, Hint->getIterator(), MBBI->getIterator());
+  MI.eraseFromParent();
+  return true;
+}
+
 /// If MBBI references a pseudo instruction that should be expanded here,
 /// do the expansion and return true.  Otherwise return false.
 bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
@@ -1948,6 +1996,14 @@ bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
   case AArch64::NAND_ZZZ:
   case AArch64::NOR_ZZZ:
     return expandSVEBitwisePseudo(MI, MBB, MBBI);
+  case AArch64::ATOMIC_STORE_HINT_B:
+    return expandAtomicStoreHintPseudo(MBB, MBBI, 8);
+  case AArch64::ATOMIC_STORE_HINT_H:
+    return expandAtomicStoreHintPseudo(MBB, MBBI, 16);
+  case AArch64::ATOMIC_STORE_HINT_S:
+    return expandAtomicStoreHintPseudo(MBB, MBBI, 32);
+  case AArch64::ATOMIC_STORE_HINT_D:
+    return expandAtomicStoreHintPseudo(MBB, MBBI, 64);
   }
   return false;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ade160de983b1..ac02cc7ca017a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64.h"
+#include "AArch64InstrInfo.h"
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
@@ -21,6 +22,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
@@ -511,6 +513,10 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
 
   bool SelectCMP_SWAP(SDNode *N);
 
+  bool isAtomicHintInst(SDNode *N, AArch64AtomicStoreHint Hint) const;
+  bool isAtomicSTSHH_KEEP(SDNode *N) const;
+  bool isAtomicSTSHH_STRM(SDNode *N) const;
+
   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
                           bool Negate);
   bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
@@ -4533,6 +4539,20 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
   return true;
 }
 
+bool AArch64DAGToDAGISel::isAtomicHintInst(SDNode *N,
+                                           AArch64AtomicStoreHint Hint) const {
+  const MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
+  return AArch64InstrInfo::decodeAtomicHintFlags(MMO->getFlags()) == Hint;
+}
+
+bool AArch64DAGToDAGISel::isAtomicSTSHH_KEEP(SDNode *N) const {
+  return isAtomicHintInst(N, AArch64AtomicStoreHint::HINT_STSHH_KEEP);
+}
+
+bool AArch64DAGToDAGISel::isAtomicSTSHH_STRM(SDNode *N) const {
+  return isAtomicHintInst(N, AArch64AtomicStoreHint::HINT_STSHH_STRM);
+}
+
 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
                                              SDValue &Shift, bool Negate) {
   if (!isa<ConstantSDNode>(N))
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 63182d31bfd7b..49d99f6f8e80d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -78,6 +78,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
@@ -18662,7 +18663,25 @@ AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
   if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
       I.hasMetadata(FALKOR_STRIDED_ACCESS_MD))
     return MOStridedAccess;
-  return MachineMemOperand::MONone;
+
+  auto Flags = MachineMemOperand::MONone;
+  const MDNode *AtomicStHint = I.getMetadata(AARCH64_ATOMIC_STORE_HINT_MD);
+  if (AtomicStHint) {
+    unsigned HintVal =
+        cast<ConstantInt>(
+            cast<ConstantAsMetadata>(AtomicStHint->getOperand(0))->getValue())
+            ->getZExtValue();
+    AArch64AtomicStoreHint Hint = getAtomicStoreHintFromMD(HintVal);
+    assert(Hint != AArch64AtomicStoreHint::HINT_NONE &&
+           "Unrecognised atomic hint value requested.");
+
+    if (static_cast<unsigned>(Hint) & 0b1)
+      Flags |= MOAtomicHintBit0;
+    if (static_cast<unsigned>(Hint) & 0b10)
+      Flags |= MOAtomicHintBit1;
+  }
+
+  return Flags;
 }
 
 bool AArch64TargetLowering::isLegalInterleavedAccessType(
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 2187f21abb70f..7d719239ecc02 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -282,6 +282,115 @@ def : Pat<(relaxed_store<atomic_store_64>
                (am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
           (STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;
 
+//===----------------------------------
+// Atomic store with hint pseudos
+//===----------------------------------
+
+class seq_cst_store<PatFrags base>
+  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingSequentiallyConsistent = 1;
+}
+
+class atomic_hint_stshh_keep_relaxed<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (relaxed_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_KEEP(N); }]>;
+
+class atomic_hint_stshh_keep_release<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (releasing_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_KEEP(N); }]>;
+
+class atomic_hint_stshh_keep_seqcst<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (seq_cst_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_KEEP(N); }]>;
+
+class atomic_hint_stshh_strm_relaxed<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (relaxed_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_STRM(N); }]>;
+
+class atomic_hint_stshh_strm_release<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (releasing_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_STRM(N); }]>;
+
+class atomic_hint_stshh_strm_seqcst<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (seq_cst_store<base> node:$ptr, node:$val),
+            [{ return isAtomicSTSHH_STRM(N); }]>;
+
+class BaseStoreHintPseudo<RegisterClass regtype>
+      : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
+                        i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]> {
+  let isCodeGenOnly = 1;
+  let hasSideEffects = 1;
+  let mayStore = 1;
+}
+
+def ATOMIC_STORE_HINT_B : BaseStoreHintPseudo<GPR32>;
+def ATOMIC_STORE_HINT_H : BaseStoreHintPseudo<GPR32>;
+def ATOMIC_STORE_HINT_S : BaseStoreHintPseudo<GPR32>;
+def ATOMIC_STORE_HINT_D : BaseStoreHintPseudo<GPR64>;
+
+let AddedComplexity = 15 in {
+  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
+  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
+  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
+  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 0))>;
+
+  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
+  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
+  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
+  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 0))>;
+
+  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
+  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
+  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
+  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 0))>;
+
+  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
+  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
+  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
+  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 1))>;
+
+  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
+  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
+  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
+  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 1))>;
+
+  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
+  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
+  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
+  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 1))>;
+}
+
 //===----------------------------------
 // Low-level exclusive operations
 //===----------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 57ea2d2f2f992..1d75d3aa89f77 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2885,6 +2885,19 @@ bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
   });
 }
 
+AArch64AtomicStoreHint
+AArch64InstrInfo::decodeAtomicHintFlags(MachineMemOperand::Flags MMOFlags) {
+  unsigned AtomicHint = 0;
+  if (MMOFlags & MOAtomicHintBit0)
+    AtomicHint += 0b1;
+  if (MMOFlags & MOAtomicHintBit1)
+    AtomicHint += 0b10;
+
+  if (!isValidAArch64AtomicHintValue(AtomicHint))
+    return AArch64AtomicStoreHint::HINT_NONE;
+  return static_cast<AArch64AtomicStoreHint>(AtomicHint);
+}
+
 bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
   switch (Opc) {
   default:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 15bd832de8d25..06fb6cbbabe5a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -16,6 +16,7 @@
 #include "AArch64.h"
 #include "AArch64RegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 #include "llvm/Support/TypeSize.h"
 #include <optional>
 
@@ -30,8 +31,13 @@ static const MachineMemOperand::Flags MOSuppressPair =
     MachineMemOperand::MOTargetFlag1;
 static const MachineMemOperand::Flags MOStridedAccess =
     MachineMemOperand::MOTargetFlag2;
+static const MachineMemOperand::Flags MOAtomicHintBit0 =
+    MachineMemOperand::MOTargetFlag3;
+static const MachineMemOperand::Flags MOAtomicHintBit1 =
+    MachineMemOperand::MOTargetFlag4;
 
 #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
+#define AARCH64_ATOMIC_STORE_HINT_MD "aarch64.atomic.hint"
 
 // AArch64 MachineCombiner patterns
 enum AArch64MachineCombinerPattern : unsigned {
@@ -230,6 +236,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
   /// Return true if the given load or store is a strided memory access.
   static bool isStridedAccess(const MachineInstr &MI);
 
+  static AArch64AtomicStoreHint
+  decodeAtomicHintFlags(MachineMemOperand::Flags MMOFlags);
+
   /// Return true if it has an unscaled load/store offset.
   static bool hasUnscaledLdStOffset(unsigned Opc);
   static bool hasUnscaledLdStOffset(MachineInstr &MI) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 4f4c999ab244d..fe98659b6900e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -45,6 +45,7 @@
 #include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/AArch64AtomicHints.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <optional>
@@ -2541,6 +2542,66 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
     I.eraseFromParent();
     return true;
   }
+  case TargetOpcode::G_STORE: {
+    GStore &St = cast<GStore>(I);
+    auto MMO = St.getMMO();
+    LLT PtrTy = MRI.getType(St.getPointerReg());
+
+    // Only for handling atomic store with hint.
+    // Can only handle AddressSpace 0, 64-bit pointers.
+    if (!St.isAtomic() || PtrTy != LLT::pointer(0, 64)) {
+      return false;
+    }
+
+    AArch64AtomicStoreHint Hint = TII.decodeAtomicHintFlags(MMO.getFlags());
+    if (Hint == AArch64AtomicStoreHint::HINT_NONE)
+      return false;
+
+    unsigned HintOpc;
+    unsigned StoreSize = St.getMemSizeInBits().getValue();
+    Register ValueReg = St.getValueReg();
+    switch (StoreSize) {
+    case 8:
+      HintOpc = AArch64::ATOMIC_STORE_HINT_B;
+      break;
+    case 16: {
+      Register CastReg;
+      if (mi_match(ValueReg, MRI, m_GBitcast(m_Reg(CastReg)))) {
+        auto Undef = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF,
+                                    {&AArch64::FPR32RegClass}, {});
+        auto Ins = MIB.buildInstr(TargetOpcode::INSERT_SUBREG,
+                                  {&AArch64::FPR32RegClass}, {Undef, ValueReg})
+                       .addImm(AArch64::hsub);
+        constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
+        constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+        ValueReg = Ins.getReg(0);
+      }
+      HintOpc = AArch64::ATOMIC_STORE_HINT_H;
+      break;
+    }
+    case 32:
+      HintOpc = AArch64::ATOMIC_STORE_HINT_S;
+      break;
+    case 64:
+      HintOpc = AArch64::ATOMIC_STORE_HINT_D;
+      break;
+    default:
+      llvm_unreachable("Unexpected getMemSizeInBits() value for atomic hint.");
+    }
+
+    unsigned HintImm = Hint == AArch64AtomicStoreHint::HINT_STSHH_KEEP ? 0 : 1;
+
+    auto StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc))
+                         .addReg(St.getPointerReg())
+                         .addReg(ValueReg)
+                         .addImm((int)toCABI(St.getMMO().getSuccessOrdering()))
+                         .addImm(static_cast<unsigned>(HintImm));
+
+    StrPseudo.cloneMemRefs(I);
+    I.eraseFromParent();
+    constrainSelectedInstRegOperands(*StrPseudo, TII, TRI, RBI);
+    return true;
+  }
   default:
     return false;
   }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
new file mode 100644
index 0000000000000..dfcfa92cbc6c8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
@@ -0,0 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel=1 -verify-machineinstrs < %s | FileCheck %s
+
+;
+; STSHH: Keep, Relaxed
+;
+
+define dso_local void @test_atomic_store_keep_relaxed_i8(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strb w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i8 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_relaxed_i16(ptr %ptr, i16 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strh w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i16 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_relaxed_i32(ptr %ptr, i32 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i32 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_relaxed_i64(ptr %ptr, i64 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str x1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i64 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+;
+; STSHH: Keep, Release
+;
+
+define dso_local void @test_atomic_store_keep_release_bfloat(ptr %ptr, bfloat %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlrh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic bfloat %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_release_half(ptr %ptr, half %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_half:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlrh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic half %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_release_float(ptr %ptr, float %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlr w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic float %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_release_double(ptr %ptr, double %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlr x8, [x0]
+; CHECK-NEXT:    ret
+  store atomic double %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+;
+; STSHH: Keep, SequentiallyConsistent
+;
+
+define dso_local void @test_atomic_store_keep_seqcst_i8(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlrb w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i8 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_seqcst_i16(ptr %ptr, i16 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlrh w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i16 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_seqcst_i32(ptr %ptr, i32 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlr w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i32 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define dso_local void @test_atomic_store_keep_seqcst_i64(ptr %ptr, i64 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stlr x1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i64 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+;
+; STSHH: Stream, Relaxed
+;
+
+define dso_local void @test_atomic_store_strm_relaxed_bfloat(ptr %ptr, bfloat %val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic bfloat %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_strm_relaxed_half(ptr %ptr, half %val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_half:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic half %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_strm_relaxed_float(ptr %ptr, float %val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic float %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_strm_relaxed_double(ptr %ptr, double %val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str x8, [x0]
+; CHECK-NEXT:    ret
+  store atomic double %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+;
+; STSHH: Stream, Release
+;
+
+define dso_local void @test_atomic_store_stream_release_i8(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrb w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_release_i16(ptr %ptr, i16 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrh w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i16 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_release_i32(ptr %ptr, i32 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i32 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_release_i64(ptr %ptr, i64 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr x1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i64 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+;
+; STSHH: Stream, SequentiallyConsistent
+;
+
+define dso_local void @test_atomic_store_stream_seqcst_bfloat(ptr %ptr, bfloat %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic bfloat %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_seqcst_i16(ptr %ptr, half %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlrh w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic half %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_seqcst_i32(ptr %ptr, float %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr w8, [x0]
+; CHECK-NEXT:    ret
+  store atomic float %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define dso_local void @test_atomic_store_stream_seqcst_double(ptr %ptr, double %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    stlr x8, [x0]
+; CHECK-NEXT:    ret
+  store atomic double  %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+!0 = !{i32 0}
+!1 = !{i32 1}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
new file mode 100644
index 0000000000000..ed69efafb04c3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
@@ -0,0 +1,30 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+  define void @test_atomic_store_keep_release_i8(ptr %ptr, i8 %val) {
+    store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+    ret void
+  }
+
+  !0 = !{i32 0}
+...
+
+---
+name:            test_atomic_store_keep_release_i8
+liveins:
+  - { reg: '$x0', virtual-reg: '' }
+  - { reg: '$w1', virtual-reg: '' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $w1, $x0
+
+    ; CHECK-LABEL: name: test_atomic_store_keep_release_i8
+    ; CHECK: BUNDLE implicit killed $w1, implicit $x0 :: (store release (s8) into %ir.ptr, align 8) {
+    ; CHECK-NEXT: STSHH 0
+    ; CHECK-NEXT: STRBBui killed renamable $w1, $x0, 0 :: (store release (s8) into %ir.ptr, align 8)
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: RET undef $lr
+
+    ATOMIC_STORE_HINT_B killed renamable $x0, killed renamable $w1, 0, 0 :: (store release (s8) into %ir.ptr, align 8)
+    RET_ReallyLR
+...

>From 189353e701d9e647684c22107c462512713ff4c9 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 19 May 2026 10:43:46 +0000
Subject: [PATCH 2/4] Move expandAtomicStoreHintPseudo to AArch64AsmPrinter.cpp

---
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 51 +++++++++++++++++
 .../AArch64/AArch64ExpandPseudoInsts.cpp      | 56 -------------------
 .../lib/Target/AArch64/AArch64InstrAtomics.td |  1 +
 .../Atomics/aarch64-atomic-store-hint.mir     | 30 ----------
 4 files changed, 52 insertions(+), 86 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir

diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index b16c0460adf38..11c26bb42d423 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -278,6 +278,9 @@ class AArch64AsmPrinter : public AsmPrinter {
   // Emit expansion of Compare-and-branch pseudo instructions
   void emitCBPseudoExpansion(const MachineInstr *MI);
 
+  // Emit expansion of atomic store with hint pseudo instructions
+  void emitAtomicHintPseudoExpansion(const MachineInstr *MI, unsigned Size);
+
   void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
   void EmitToStreamer(const MCInst &Inst) {
     EmitToStreamer(*OutStreamer, Inst);
@@ -3126,6 +3129,42 @@ void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, Inst);
 }
 
+void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI,
+                                                      unsigned Size) {
+
+  unsigned StOpc;
+  unsigned Order = MI->getOperand(2).getImm();
+  bool Relaxed = Order == 0;
+  switch (Size) {
+  case 8:
+    StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
+    break;
+  case 16:
+    StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
+    break;
+  case 32:
+    StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
+    break;
+  case 64:
+    StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
+    break;
+  default:
+    llvm_unreachable("Unexpected atomic hint size.");
+  }
+
+  EmitToStreamer(
+      MCInstBuilder(AArch64::STSHH).addImm(MI->getOperand(3).getImm()));
+
+  MCInst Store;
+  Store.setOpcode(StOpc);
+  Store.addOperand(MCOperand::createReg(MI->getOperand(1).getReg()));
+  Store.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+  Store.setFlags(MI->getFlags());
+  if (Relaxed)
+    Store.addOperand(MCOperand::createImm(0));
+  EmitToStreamer(*OutStreamer, Store);
+}
+
 // Simple pseudo-instructions have their lowering (with expansion to real
 // instructions) auto-generated.
 #include "AArch64GenMCPseudoLowering.inc"
@@ -3813,6 +3852,18 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
   case AArch64::CBXPrr:
     emitCBPseudoExpansion(MI);
     return;
+  case AArch64::ATOMIC_STORE_HINT_B:
+    emitAtomicHintPseudoExpansion(MI, 8);
+    return;
+  case AArch64::ATOMIC_STORE_HINT_H:
+    emitAtomicHintPseudoExpansion(MI, 16);
+    return;
+  case AArch64::ATOMIC_STORE_HINT_S:
+    emitAtomicHintPseudoExpansion(MI, 32);
+    return;
+  case AArch64::ATOMIC_STORE_HINT_D:
+    emitAtomicHintPseudoExpansion(MI, 64);
+    return;
   }
 
   if (emitDeactivationSymbolRelocation(MI->getDeactivationSymbol()))
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 81fb5619f57b0..5fa93da1544fc 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -70,9 +70,6 @@ class AArch64ExpandPseudoImpl {
                             MachineBasicBlock::iterator MBBI);
   bool expandSVEBitwisePseudo(MachineInstr &MI, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI);
-  bool expandAtomicStoreHintPseudo(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned Size);
   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
                       unsigned ExtendImm, unsigned ZeroReg,
@@ -1311,51 +1308,6 @@ bool AArch64ExpandPseudoImpl::expandFormTuplePseudo(
   return true;
 }
 
-bool AArch64ExpandPseudoImpl::expandAtomicStoreHintPseudo(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Size) {
-  MachineInstr &MI = *MBBI;
-  DebugLoc DL = MI.getDebugLoc();
-
-  unsigned StOpc;
-  unsigned Order = MI.getOperand(2).getImm();
-  bool Relaxed = Order == 0;
-
-  switch (Size) {
-  case 8:
-    StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
-    break;
-  case 16:
-    StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
-    break;
-  case 32:
-    StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
-    break;
-  case 64:
-    StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
-    break;
-  default:
-    llvm_unreachable("Unexpected atomic hint size.");
-  }
-
-  auto *Hint = BuildMI(MBB, MBBI, DL, TII->get(AArch64::STSHH))
-                   .addImm(MI.getOperand(3).getImm())
-                   .getInstr();
-
-  auto Store = BuildMI(MBB, MBBI, DL, TII->get(StOpc))
-                   .add(MI.getOperand(1))
-                   .addReg(MI.getOperand(0).getReg())
-                   .setMemRefs(MI.memoperands())
-                   .setMIFlags(MI.getFlags());
-
-  if (Relaxed)
-    Store.addImm(0);
-
-  transferImpOps(MI, Store, Store);
-  finalizeBundle(MBB, Hint->getIterator(), MBBI->getIterator());
-  MI.eraseFromParent();
-  return true;
-}
-
 /// If MBBI references a pseudo instruction that should be expanded here,
 /// do the expansion and return true.  Otherwise return false.
 bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
@@ -1996,14 +1948,6 @@ bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
   case AArch64::NAND_ZZZ:
   case AArch64::NOR_ZZZ:
     return expandSVEBitwisePseudo(MI, MBB, MBBI);
-  case AArch64::ATOMIC_STORE_HINT_B:
-    return expandAtomicStoreHintPseudo(MBB, MBBI, 8);
-  case AArch64::ATOMIC_STORE_HINT_H:
-    return expandAtomicStoreHintPseudo(MBB, MBBI, 16);
-  case AArch64::ATOMIC_STORE_HINT_S:
-    return expandAtomicStoreHintPseudo(MBB, MBBI, 32);
-  case AArch64::ATOMIC_STORE_HINT_D:
-    return expandAtomicStoreHintPseudo(MBB, MBBI, 64);
   }
   return false;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 7d719239ecc02..af57d471e0411 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -325,6 +325,7 @@ class atomic_hint_stshh_strm_seqcst<PatFrag base>
 class BaseStoreHintPseudo<RegisterClass regtype>
       : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
                         i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]> {
+  let Size = 8;
   let isCodeGenOnly = 1;
   let hasSideEffects = 1;
   let mayStore = 1;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
deleted file mode 100644
index ed69efafb04c3..0000000000000
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
+++ /dev/null
@@ -1,30 +0,0 @@
-# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s
-
---- |
-  define void @test_atomic_store_keep_release_i8(ptr %ptr, i8 %val) {
-    store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
-    ret void
-  }
-
-  !0 = !{i32 0}
-...
-
----
-name:            test_atomic_store_keep_release_i8
-liveins:
-  - { reg: '$x0', virtual-reg: '' }
-  - { reg: '$w1', virtual-reg: '' }
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: $w1, $x0
-
-    ; CHECK-LABEL: name: test_atomic_store_keep_release_i8
-    ; CHECK: BUNDLE implicit killed $w1, implicit $x0 :: (store release (s8) into %ir.ptr, align 8) {
-    ; CHECK-NEXT: STSHH 0
-    ; CHECK-NEXT: STRBBui killed renamable $w1, $x0, 0 :: (store release (s8) into %ir.ptr, align 8)
-    ; CHECK-NEXT: }
-    ; CHECK-NEXT: RET undef $lr
-
-    ATOMIC_STORE_HINT_B killed renamable $x0, killed renamable $w1, 0, 0 :: (store release (s8) into %ir.ptr, align 8)
-    RET_ReallyLR
-...

>From d9a3b6dfe3925ef1206fcd822d0c64b46fc1ba62 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Fri, 12 Jun 2026 10:38:00 +0000
Subject: [PATCH 3/4] - Document aarch64.atomic.hint in LangRef - Rewrite
 patterns - Use LLVM ordering in patterns & expansion - Add Clang tests for
 more types - Remove AARCH64_ATOMIC_STORE_HINT_MD

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  10 +-
 clang/lib/Sema/SemaARM.cpp                    |   9 +-
 clang/test/CodeGen/arm_acle.c                 |  90 ++++++++++++++
 clang/test/Sema/builtins-arm64.c              |   6 +-
 llvm/docs/LangRef.rst                         |  34 +++++-
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |   2 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |   2 +-
 .../lib/Target/AArch64/AArch64InstrAtomics.td | 115 ++++--------------
 llvm/lib/Target/AArch64/AArch64InstrInfo.h    |   1 -
 .../GISel/AArch64InstructionSelector.cpp      |  32 ++++-
 10 files changed, 192 insertions(+), 109 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 11ddd5b61e4cb..975f37fb07a7d 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9605,6 +9605,11 @@ def err_atomic_builtin_must_be_pointer : Error<
 def err_atomic_builtin_must_be_pointer_intptr : Error<
   "address argument to atomic builtin must be a pointer to integer or pointer"
   " (%0 invalid)">;
+
+def err_atomic_hint_builtin_must_be_pointer : Error<
+  "address argument to atomic hint builtin must be a pointer to a scalar "
+  "integral or floating-point type of 8, 16, 32, or 64 bits (%0 invalid)">;
+
 def err_atomic_builtin_cannot_be_const : Error<
   "address argument to atomic builtin cannot be const-qualified (%0 invalid)">;
 def err_atomic_builtin_must_be_pointer_intfltptr : Error<
@@ -9671,8 +9676,9 @@ def err_atomic_op_hint_data_size : Error<
   "address argument to atomic store with hint must be of size 8, 16, 32 or 64 bits">;
 def err_atomic_hint_has_invalid_memory_order : Error<
    "invalid memory order argument to atomic hint operation (%0 invalid)">;
-def err_atomic_hint_has_invalid_hint_type : Error<
-   "invalid hint type argument to atomic hint operation (%0 invalid)">;
+def warn_atomic_hint_has_invalid_hint_type : Warning<
+   "unrecognised hint type argument to atomic hint operation (%0)">,
+  InGroup<DiagGroup<"atomic-hint-type">>;
 def warn_atomic_op_has_invalid_memory_order : Warning<
   "%select{|success |failure }0memory order argument to atomic operation is invalid">,
   InGroup<DiagGroup<"atomic-memory-ordering">>;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 78f83d18deab8..33d1750287b03 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -338,7 +338,7 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
   auto *PtrTy = PtrArg->getType()->getAs<PointerType>();
   if (!PtrTy)
     return Diag(TheCall->getBeginLoc(),
-                diag::err_atomic_builtin_must_be_pointer)
+                diag::err_atomic_hint_builtin_must_be_pointer)
            << PtrArg->getType() << 0 << PtrArg->getSourceRange();
   QualType PtrQT = PtrTy->getPointeeType();
 
@@ -348,8 +348,7 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
                 diag::err_atomic_op_needs_atomic_int_or_fp)
            << 0 << PtrQT << PtrArg->getSourceRange();
 
-  unsigned TySize =
-      Context.getTypeSize(Context.getCanonicalType(PtrQT).getUnqualifiedType());
+  unsigned TySize = Context.getTypeSize(PtrQT);
   if (TySize != 8 && TySize != 16 && TySize != 32 && TySize != 64)
     return Diag(TheCall->getBeginLoc(), diag::err_atomic_op_hint_data_size)
            << PtrArg->getSourceRange();
@@ -398,14 +397,14 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
   std::optional<llvm::APSInt> HintAP = HintArg->getIntegerConstantExpr(Context);
   if (!HintAP)
     return Diag(TheCall->getBeginLoc(),
-                diag::err_atomic_hint_has_invalid_hint_type)
+                diag::warn_atomic_hint_has_invalid_hint_type)
            << HintArg->getType() << HintArg->getSourceRange();
 
   unsigned Hint = HintAP->getZExtValue();
   if (llvm::getAtomicStoreHintFromMD(Hint) ==
       llvm::AArch64AtomicStoreHint::HINT_NONE)
     return Diag(TheCall->getBeginLoc(),
-                diag::err_atomic_hint_has_invalid_hint_type)
+                diag::warn_atomic_hint_has_invalid_hint_type)
            << *HintAP << HintArg->getSourceRange();
 
   return false;
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index a8aa0916a8a4c..69ad674193b9d 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1833,6 +1833,24 @@ void test_atomic_store_hint_char(char *ptr, char data) {
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_uchar(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i8 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 1, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_uchar(unsigned char *ptr, unsigned char data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_schar(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i8 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 1, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_schar(signed char *ptr, signed char data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_bfloat(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic bfloat [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META4:![0-9]+]]
@@ -1842,6 +1860,15 @@ void test_atomic_store_hint_bfloat(__bf16 *ptr, __bf16 data) {
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 1);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_half(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic half [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META4:![0-9]+]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_half(__fp16 *ptr, __fp16 data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 1);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_short(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic i16 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META3]]
@@ -1851,6 +1878,33 @@ void test_atomic_store_hint_short(short *ptr, short data) {
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_ushort(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i16 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_ushort(unsigned short *ptr, unsigned short data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_int(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_int(int *ptr, int data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_unsigned(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_unsigned(unsigned *ptr, unsigned data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_u32(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
@@ -1860,6 +1914,15 @@ void test_atomic_store_hint_u32(uint32_t *ptr, uint32_t data) {
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_s32(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_s32(int32_t *ptr, int32_t data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_float(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic float [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
@@ -1878,6 +1941,24 @@ void test_atomic_store_hint_s64(int64_t *ptr, int64_t data) {
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_long(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_long(long *ptr, long data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_ulong(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_ulong(unsigned long *ptr, unsigned long data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_long_long_int(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]]
@@ -1887,6 +1968,15 @@ void test_atomic_store_hint_long_long_int(long long int *ptr, long long int data
   __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
 }
 
+// AArch64-LABEL: @test_atomic_store_hint_long_long_uint(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT:    ret void
+//
+void test_atomic_store_hint_long_long_uint(unsigned long long int *ptr, unsigned long long int data) {
+  __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
 // AArch64-LABEL: @test_atomic_store_hint_double(
 // AArch64-NEXT:  entry:
 // AArch64-NEXT:    store atomic double [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 8, !aarch64.atomic.hint [[META4]]
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index fb4718a1bd1f4..5372aff24b739 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -58,13 +58,13 @@ void test_atomic_store_hint(char *c_ptr, __int128 *inv_ptr, float *f_ptr,
   __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0); // expected-error {{too few arguments to function call, expected 4, have 3}}
   __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}}
 
-  __builtin_arm_atomic_store_with_hint(0, c_data, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
+  __builtin_arm_atomic_store_with_hint(0, c_data, 0, 0); // expected-error {{address argument to atomic hint builtin must be a pointer to a scalar integral or floating-point type of 8, 16, 32, or 64 bits ('int' invalid)}}
   __builtin_arm_atomic_store_with_hint(c_ptr, f_data, 0, 0); // expected-error {{arguments are of different types ('char' vs 'float')}}
   __builtin_arm_atomic_store_with_hint(inv_ptr, inv_data, 0, 0); // expected-error {{address argument to atomic store with hint must be of size 8, 16, 32 or 64 bits}}
 
   __builtin_arm_atomic_store_with_hint(c_ptr, c_data, inv_int, 0); // expected-error {{invalid memory order argument to atomic hint operation ('int' invalid)}}
   __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 2, 0); // expected-error {{invalid memory order argument to atomic hint operation (2 invalid)}}
 
-  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, inv_int); // expected-error {{invalid hint type argument to atomic hint operation ('int' invalid)}}
-  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 3); // expected-error {{invalid hint type argument to atomic hint operation (3 invalid)}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, inv_int); // expected-warning {{unrecognised hint type argument to atomic hint operation ('int')}}
+  __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 3); // expected-warning {{unrecognised hint type argument to atomic hint operation (3)}}
 }
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 92cbfab50c8ef..1b459b35709c8 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -8683,6 +8683,33 @@ to the SSA value of the pointer operand.
 Note that this is an experimental feature, which means that its semantics might
 change in the future.
 
+.. _md_aarch64.atomic.hint:
+
+'``aarch64.atomic.hint``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``aarch64.atomic.hint`` metadata may be attached to an atomic store
+instruction, referencing a single metadata node containing a single ``i32``
+entry:
+
+.. code-block:: llvm
+
+  store atomic i64 %x, ptr %y seq_cst, align 8, !aarch64.atomic.hint !0
+
+  ...
+  !0 = !{i32 1}
+
+On AArch64 targets, this metadata may be used to emit an atomic store together
+with a hint instruction. The hint is a suggestion to the compiler which may be
+used when selecting code sequences, but it is not required to emit a specific
+hint instruction. The following hint values are currently recognised:
+
+  * ``0``: ``stshh keep`` hint.
+  * ``1``: ``stshh strm`` hint.
+
+If the compiler does not recognise the hint value provided, it may ignore the
+metadata. Targets that do not support this metadata may also ignore it.
+
 '``type``' Metadata
 ^^^^^^^^^^^^^^^^^^^
 
@@ -12154,9 +12181,10 @@ Syntax:
 ::
 
       store [volatile] <ty> <value>, ptr <pointer>[, align <alignment>][, !nontemporal !<nontemp_node>][, !invariant.group !<empty_node>]        ; yields void
-      store atomic [volatile] <ty> <value>, ptr <pointer> [syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group !<empty_node>] ; yields void
+      store atomic [volatile] <ty> <value>, ptr <pointer> [syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group !<empty_node>][, !aarch64.atomic.hint !<aarch64_hint_value>]; yields void
       !<nontemp_node> = !{ i32 1 }
       !<empty_node> = !{}
+      !<aarch64_hint_value> = !{ i32 <hint> }
 
 Overview:
 """""""""
@@ -12212,6 +12240,10 @@ x86.
 The optional ``!invariant.group`` metadata must reference a
 single metadata name ``<empty_node>``. See ``invariant.group`` metadata.
 
+The optional ``!aarch64.atomic.hint`` metadata must reference a single metadata
+name ``<aarch64_hint_value>``. See ``aarch64.atomic.hint`` metadata.
+
+
 Semantics:
 """"""""""
 
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 11c26bb42d423..e16a6ac3c9aca 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -3134,7 +3134,7 @@ void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI,
 
   unsigned StOpc;
   unsigned Order = MI->getOperand(2).getImm();
-  bool Relaxed = Order == 0;
+  bool Relaxed = Order == 2;
   switch (Size) {
   case 8:
     StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 49d99f6f8e80d..4a4b711ee7448 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18665,7 +18665,7 @@ AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
     return MOStridedAccess;
 
   auto Flags = MachineMemOperand::MONone;
-  const MDNode *AtomicStHint = I.getMetadata(AARCH64_ATOMIC_STORE_HINT_MD);
+  const MDNode *AtomicStHint = I.getMetadata("aarch64.atomic.hint");
   if (AtomicStHint) {
     unsigned HintVal =
         cast<ConstantInt>(
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index af57d471e0411..af06ef9014031 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -286,42 +286,6 @@ def : Pat<(relaxed_store<atomic_store_64>
 // Atomic store with hint pseudos
 //===----------------------------------
 
-class seq_cst_store<PatFrags base>
-  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
-  let IsAtomic = 1;
-  let IsAtomicOrderingSequentiallyConsistent = 1;
-}
-
-class atomic_hint_stshh_keep_relaxed<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (relaxed_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_KEEP(N); }]>;
-
-class atomic_hint_stshh_keep_release<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (releasing_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_KEEP(N); }]>;
-
-class atomic_hint_stshh_keep_seqcst<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (seq_cst_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_KEEP(N); }]>;
-
-class atomic_hint_stshh_strm_relaxed<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (relaxed_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_STRM(N); }]>;
-
-class atomic_hint_stshh_strm_release<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (releasing_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_STRM(N); }]>;
-
-class atomic_hint_stshh_strm_seqcst<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val),
-            (seq_cst_store<base> node:$ptr, node:$val),
-            [{ return isAtomicSTSHH_STRM(N); }]>;
-
 class BaseStoreHintPseudo<RegisterClass regtype>
       : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
                         i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]> {
@@ -336,60 +300,33 @@ def ATOMIC_STORE_HINT_H : BaseStoreHintPseudo<GPR32>;
 def ATOMIC_STORE_HINT_S : BaseStoreHintPseudo<GPR32>;
 def ATOMIC_STORE_HINT_D : BaseStoreHintPseudo<GPR64>;
 
+class atomic_hint_store<PatFrag Base, bit Rel, bit SeqCst, code Pred, code GIPred>
+  : PatFrag<(ops node:$ptr, node:$val),
+            (Base node:$val, node:$ptr), Pred> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingReleaseOrStronger = Rel;
+  let IsAtomicOrderingSequentiallyConsistent = SeqCst;
+  let GISelPredicateCode = GIPred;
+}
+
+multiclass AtomicHintPatterns<int Order, int Hint, bit Rel, bit SeqCst, code Pred, code GIPred> {
+  def : Pat<(atomic_hint_store<atomic_store_8, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_16, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_32, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_64, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR64:$data),
+            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 Order), (i32 Hint))>;
+}
+
 let AddedComplexity = 15 in {
-  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_8> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
-  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_16> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
-  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_32> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
-  def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_64> GPR64sp:$addr, GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 0))>;
-
-  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_8> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
-  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_16> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
-  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_32> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
-  def : Pat<(atomic_hint_stshh_keep_release<atomic_store_64> GPR64sp:$addr, GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 0))>;
-
-  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_8> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
-  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_16> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
-  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_32> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
-  def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_64> GPR64sp:$addr, GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 0))>;
-
-  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_8> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
-  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_16> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
-  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_32> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
-  def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_64> GPR64sp:$addr, GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 1))>;
-
-  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_8> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
-  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_16> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
-  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_32> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
-  def : Pat<(atomic_hint_stshh_strm_release<atomic_store_64> GPR64sp:$addr, GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 1))>;
-
-  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_8> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
-  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_16> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
-  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_32> GPR64sp:$addr, GPR32:$data),
-            (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
-  def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_64> GPR64sp:$addr, GPR64:$data),
-            (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 1))>;
+  defm : AtomicHintPatterns<2, 0, 0, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+  defm : AtomicHintPatterns<5, 0, 1, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+  defm : AtomicHintPatterns<7, 0, 0, 1, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+  defm : AtomicHintPatterns<2, 1, 0, 0, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+  defm : AtomicHintPatterns<5, 1, 1, 0, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+  defm : AtomicHintPatterns<7, 1, 0, 1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
 }
 
 //===----------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 06fb6cbbabe5a..1eb5dac67ea2a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -37,7 +37,6 @@ static const MachineMemOperand::Flags MOAtomicHintBit1 =
     MachineMemOperand::MOTargetFlag4;
 
 #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
-#define AARCH64_ATOMIC_STORE_HINT_MD "aarch64.atomic.hint"
 
 // AArch64 MachineCombiner patterns
 enum AArch64MachineCombinerPattern : unsigned {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index fe98659b6900e..3c1f7e6213a36 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -522,6 +522,11 @@ class AArch64InstructionSelector : public InstructionSelector {
                                       MachineOperand &Predicate,
                                       MachineIRBuilder &MIRBuilder) const;
 
+  bool isAtomicHintInst(const MachineInstr &MI,
+                        AArch64AtomicStoreHint Hint) const;
+  bool isAtomicSTSHH_KEEP(const MachineInstr &MI) const;
+  bool isAtomicSTSHH_STRM(const MachineInstr &MI) const;
+
   /// Return true if \p MI is a load or store of \p NumBytes bytes.
   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
 
@@ -2546,17 +2551,15 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
     GStore &St = cast<GStore>(I);
     auto MMO = St.getMMO();
     LLT PtrTy = MRI.getType(St.getPointerReg());
+    AArch64AtomicStoreHint Hint = TII.decodeAtomicHintFlags(MMO.getFlags());
 
     // Only for handling atomic store with hint.
     // Can only handle AddressSpace 0, 64-bit pointers.
-    if (!St.isAtomic() || PtrTy != LLT::pointer(0, 64)) {
+    if (!St.isAtomic() || PtrTy != LLT::pointer(0, 64) ||
+        Hint == AArch64AtomicStoreHint::HINT_NONE) {
       return false;
     }
 
-    AArch64AtomicStoreHint Hint = TII.decodeAtomicHintFlags(MMO.getFlags());
-    if (Hint == AArch64AtomicStoreHint::HINT_NONE)
-      return false;
-
     unsigned HintOpc;
     unsigned StoreSize = St.getMemSizeInBits().getValue();
     Register ValueReg = St.getValueReg();
@@ -2594,7 +2597,7 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
     auto StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc))
                          .addReg(St.getPointerReg())
                          .addReg(ValueReg)
-                         .addImm((int)toCABI(St.getMMO().getSuccessOrdering()))
+                         .addImm((int)MMO.getSuccessOrdering())
                          .addImm(static_cast<unsigned>(HintImm));
 
     StrPseudo.cloneMemRefs(I);
@@ -8089,6 +8092,23 @@ void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
                                                       .getZExtValue()));
 }
 
+bool AArch64InstructionSelector::isAtomicHintInst(
+    const MachineInstr &MI, AArch64AtomicStoreHint Hint) const {
+  const GStore &St = cast<GStore>(MI);
+  auto MMO = St.getMMO();
+  return AArch64InstrInfo::decodeAtomicHintFlags(MMO.getFlags()) == Hint;
+}
+
+bool AArch64InstructionSelector::isAtomicSTSHH_KEEP(
+    const MachineInstr &MI) const {
+  return isAtomicHintInst(MI, AArch64AtomicStoreHint::HINT_STSHH_KEEP);
+}
+
+bool AArch64InstructionSelector::isAtomicSTSHH_STRM(
+    const MachineInstr &MI) const {
+  return isAtomicHintInst(MI, AArch64AtomicStoreHint::HINT_STSHH_STRM);
+}
+
 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
     const MachineInstr &MI, unsigned NumBytes) const {
   if (!MI.mayLoadOrStore())

>From 5c39cc14b76da6e90a17d6f441fb6354012e9a79 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 23 Jun 2026 10:28:36 +0000
Subject: [PATCH 4/4] - Add support for different addressing modes when the
 ordering is relaxed - Set IsAtomicOrderingMonotonic & IsAtomicOrderingRelease
 in atomic_hint_store - Do not attach invalid hints to atomic_store & ignore
 any invalid hints found

---
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      |  18 +-
 clang/lib/Sema/SemaARM.cpp                    |  19 +-
 clang/test/CodeGen/builtins-arm64.c           |   4 +
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 143 +++++-
 .../Target/AArch64/AArch64ISelLowering.cpp    |   2 -
 .../lib/Target/AArch64/AArch64InstrAtomics.td | 137 ++++--
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |   2 -
 .../GISel/AArch64InstructionSelector.cpp      |  92 ++--
 .../Atomics/aarch64-atomic-store-hint.ll      |  62 ++-
 .../Atomics/aarch64-relaxed-store-hint.ll     | 416 ++++++++++++++++++
 10 files changed, 780 insertions(+), 115 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-relaxed-store-hint.ll

diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 4a13767268f96..555c0a9f33c5d 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -2167,15 +2167,15 @@ static Value *EmitAtomicStoreWithHintBuiltin(CodeGenFunction &CGF,
     llvm_unreachable(
         "Expected integer hint argument to atomic store with hint.");
   unsigned HintArg = Result.Val.getInt().getExtValue();
-  assert((getAtomicStoreHintFromMD(HintArg) !=
-          AArch64AtomicStoreHint::HINT_NONE) &&
-         "Invalid hint type");
-
-  MDNode *HintMDVal =
-      MDNode::get(CGM.getLLVMContext(),
-                  llvm::ConstantAsMetadata::get(Builder.getInt32(HintArg)));
-  Store->setMetadata(CGM.getModule().getMDKindID("aarch64.atomic.hint"),
-                     HintMDVal);
+
+  // Attach the hint if valid
+  if (getAtomicStoreHintFromMD(HintArg) != AArch64AtomicStoreHint::HINT_NONE) {
+    MDNode *HintMDVal =
+        MDNode::get(CGM.getLLVMContext(),
+                    llvm::ConstantAsMetadata::get(Builder.getInt32(HintArg)));
+    Store->setMetadata(CGM.getModule().getMDKindID("aarch64.atomic.hint"),
+                       HintMDVal);
+  }
 
   return Store;
 }
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 33d1750287b03..11078d0578240 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -331,10 +331,11 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
   // Arg 0 should be the pointer type. The pointee type must be a
   // scalar integral or floating-point type of 8, 16, 32 or 64 bits.
   ASTContext &Context = getASTContext();
-  Expr *PtrArg = TheCall->getArg(0);
-  auto PtrArgRes = SemaRef.DefaultFunctionArrayLvalueConversion(PtrArg);
+  auto PtrArgRes =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
   if (PtrArgRes.isInvalid())
     return true;
+  auto *PtrArg = PtrArgRes.get();
   auto *PtrTy = PtrArg->getType()->getAs<PointerType>();
   if (!PtrTy)
     return Diag(TheCall->getBeginLoc(),
@@ -395,17 +396,17 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
   auto HintArg =
       SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(3)).get();
   std::optional<llvm::APSInt> HintAP = HintArg->getIntegerConstantExpr(Context);
-  if (!HintAP)
-    return Diag(TheCall->getBeginLoc(),
-                diag::warn_atomic_hint_has_invalid_hint_type)
-           << HintArg->getType() << HintArg->getSourceRange();
+  if (!HintAP) {
+    Diag(TheCall->getBeginLoc(), diag::warn_atomic_hint_has_invalid_hint_type)
+        << HintArg->getType() << HintArg->getSourceRange();
+    return false;
+  }
 
   unsigned Hint = HintAP->getZExtValue();
   if (llvm::getAtomicStoreHintFromMD(Hint) ==
       llvm::AArch64AtomicStoreHint::HINT_NONE)
-    return Diag(TheCall->getBeginLoc(),
-                diag::warn_atomic_hint_has_invalid_hint_type)
-           << *HintAP << HintArg->getSourceRange();
+    Diag(TheCall->getBeginLoc(), diag::warn_atomic_hint_has_invalid_hint_type)
+        << *HintAP << HintArg->getSourceRange();
 
   return false;
 }
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index ad9ba7feca671..01332f8114a11 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -225,6 +225,10 @@ void atomic_store_with_hint(int64_t *a, int64_t b) {
 
   __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELEASE, 1); // HINT_STSHH_STRM
   // CHECK: store atomic i64 {{.*}}, ptr {{.*}} release, align 8, !aarch64.atomic.hint ![[M2:[0-9]]]
+
+  // Invalid hint should be dropped
+  __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELAXED, 2); // Invalid Hint
+  // CHECK: store atomic i64 {{.*}}, ptr {{.*}} monotonic, align 8
 }
 
 // CHECK: ![[M0]] = !{!"1:2:3:4:5"}
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index e16a6ac3c9aca..1f5814529200d 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -279,7 +279,9 @@ class AArch64AsmPrinter : public AsmPrinter {
   void emitCBPseudoExpansion(const MachineInstr *MI);
 
   // Emit expansion of atomic store with hint pseudo instructions
-  void emitAtomicHintPseudoExpansion(const MachineInstr *MI, unsigned Size);
+  void emitAtomicHintPseudoExpansion(const MachineInstr *MI);
+  void emitAtomicHintPseudoExpansionRO(const MachineInstr *MI);
+  void emitAtomicHintPseudoExpansionImm(const MachineInstr *MI);
 
   void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
   void EmitToStreamer(const MCInst &Inst) {
@@ -3129,23 +3131,22 @@ void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, Inst);
 }
 
-void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI,
-                                                      unsigned Size) {
+void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI) {
 
   unsigned StOpc;
   unsigned Order = MI->getOperand(2).getImm();
   bool Relaxed = Order == 2;
-  switch (Size) {
-  case 8:
+  switch (MI->getOpcode()) {
+  case AArch64::ATOMIC_STORE_HINT_B:
     StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
     break;
-  case 16:
+  case AArch64::ATOMIC_STORE_HINT_H:
     StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
     break;
-  case 32:
+  case AArch64::ATOMIC_STORE_HINT_S:
     StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
     break;
-  case 64:
+  case AArch64::ATOMIC_STORE_HINT_D:
     StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
     break;
   default:
@@ -3165,6 +3166,104 @@ void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI,
   EmitToStreamer(*OutStreamer, Store);
 }
 
+void AArch64AsmPrinter::emitAtomicHintPseudoExpansionRO(
+    const MachineInstr *MI) {
+  unsigned StOpc;
+  unsigned Order = MI->getOperand(5).getImm();
+  assert(Order == 2 &&
+         "Atomic store addressing mode only supports relaxed stores");
+
+  switch (MI->getOpcode()) {
+  case AArch64::ATOMIC_STORE_HINT_BroW:
+    StOpc = AArch64::STRBBroW;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_HroW:
+    StOpc = AArch64::STRHHroW;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_SroW:
+    StOpc = AArch64::STRWroW;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_DroW:
+    StOpc = AArch64::STRXroW;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_BroX:
+    StOpc = AArch64::STRBBroX;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_HroX:
+    StOpc = AArch64::STRHHroX;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_SroX:
+    StOpc = AArch64::STRWroX;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_DroX:
+    StOpc = AArch64::STRXroX;
+    break;
+  default:
+    llvm_unreachable("Unexpected atomic hint opcode.");
+  }
+
+  EmitToStreamer(
+      MCInstBuilder(AArch64::STSHH).addImm(MI->getOperand(6).getImm()));
+
+  MCInst Store;
+  Store.setOpcode(StOpc);
+  Store.addOperand(MCOperand::createReg(MI->getOperand(2).getReg())); // Data
+  Store.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); // Rn
+  Store.addOperand(MCOperand::createReg(MI->getOperand(1).getReg())); // Rm
+  Store.addOperand(MCOperand::createImm(MI->getOperand(3).getImm())); // Signed
+  Store.addOperand(MCOperand::createImm(MI->getOperand(4).getImm())); // Shift
+  Store.setFlags(MI->getFlags());
+  EmitToStreamer(*OutStreamer, Store);
+}
+
+void AArch64AsmPrinter::emitAtomicHintPseudoExpansionImm(
+    const MachineInstr *MI) {
+  unsigned StOpc;
+  unsigned Order = MI->getOperand(3).getImm();
+  assert(Order == 2 &&
+         "Atomic store addressing mode only supports relaxed stores");
+
+  switch (MI->getOpcode()) {
+  case AArch64::ATOMIC_STORE_HINT_Bui:
+    StOpc = AArch64::STRBBui;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_Hui:
+    StOpc = AArch64::STRHHui;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_Sui:
+    StOpc = AArch64::STRWui;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_Dui:
+    StOpc = AArch64::STRXui;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_Bi:
+    StOpc = AArch64::STURBBi;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_Hi:
+    StOpc = AArch64::STURHHi;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_Si:
+    StOpc = AArch64::STURWi;
+    break;
+  case AArch64::ATOMIC_STORE_HINT_Di:
+    StOpc = AArch64::STURXi;
+    break;
+  default:
+    llvm_unreachable("Unexpected atomic hint opcode.");
+  }
+
+  EmitToStreamer(
+      MCInstBuilder(AArch64::STSHH).addImm(MI->getOperand(4).getImm()));
+
+  MCInst Store;
+  Store.setOpcode(StOpc);
+  Store.addOperand(MCOperand::createReg(MI->getOperand(1).getReg())); // Data
+  Store.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); // Rn
+  Store.addOperand(MCOperand::createImm(MI->getOperand(2).getImm())); // Imm
+  Store.setFlags(MI->getFlags());
+  EmitToStreamer(*OutStreamer, Store);
+}
+
 // Simple pseudo-instructions have their lowering (with expansion to real
 // instructions) auto-generated.
 #include "AArch64GenMCPseudoLowering.inc"
@@ -3853,16 +3952,30 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
     emitCBPseudoExpansion(MI);
     return;
   case AArch64::ATOMIC_STORE_HINT_B:
-    emitAtomicHintPseudoExpansion(MI, 8);
-    return;
   case AArch64::ATOMIC_STORE_HINT_H:
-    emitAtomicHintPseudoExpansion(MI, 16);
-    return;
   case AArch64::ATOMIC_STORE_HINT_S:
-    emitAtomicHintPseudoExpansion(MI, 32);
-    return;
   case AArch64::ATOMIC_STORE_HINT_D:
-    emitAtomicHintPseudoExpansion(MI, 64);
+    emitAtomicHintPseudoExpansion(MI);
+    return;
+  case AArch64::ATOMIC_STORE_HINT_BroW:
+  case AArch64::ATOMIC_STORE_HINT_HroW:
+  case AArch64::ATOMIC_STORE_HINT_SroW:
+  case AArch64::ATOMIC_STORE_HINT_DroW:
+  case AArch64::ATOMIC_STORE_HINT_BroX:
+  case AArch64::ATOMIC_STORE_HINT_HroX:
+  case AArch64::ATOMIC_STORE_HINT_SroX:
+  case AArch64::ATOMIC_STORE_HINT_DroX:
+    emitAtomicHintPseudoExpansionRO(MI);
+    return;
+  case AArch64::ATOMIC_STORE_HINT_Bui:
+  case AArch64::ATOMIC_STORE_HINT_Hui:
+  case AArch64::ATOMIC_STORE_HINT_Sui:
+  case AArch64::ATOMIC_STORE_HINT_Dui:
+  case AArch64::ATOMIC_STORE_HINT_Bi:
+  case AArch64::ATOMIC_STORE_HINT_Hi:
+  case AArch64::ATOMIC_STORE_HINT_Si:
+  case AArch64::ATOMIC_STORE_HINT_Di:
+    emitAtomicHintPseudoExpansionImm(MI);
     return;
   }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4a4b711ee7448..5f2687875dba6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18672,8 +18672,6 @@ AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
             cast<ConstantAsMetadata>(AtomicStHint->getOperand(0))->getValue())
             ->getZExtValue();
     AArch64AtomicStoreHint Hint = getAtomicStoreHintFromMD(HintVal);
-    assert(Hint != AArch64AtomicStoreHint::HINT_NONE &&
-           "Unrecognised atomic hint value requested.");
 
     if (static_cast<unsigned>(Hint) & 0b1)
       Flags |= MOAtomicHintBit0;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index af06ef9014031..e0dc12bc62de6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -286,47 +286,126 @@ def : Pat<(relaxed_store<atomic_store_64>
 // Atomic store with hint pseudos
 //===----------------------------------
 
-class BaseStoreHintPseudo<RegisterClass regtype>
-      : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
-                        i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]> {
-  let Size = 8;
-  let isCodeGenOnly = 1;
-  let hasSideEffects = 1;
-  let mayStore = 1;
-}
-
-def ATOMIC_STORE_HINT_B : BaseStoreHintPseudo<GPR32>;
-def ATOMIC_STORE_HINT_H : BaseStoreHintPseudo<GPR32>;
-def ATOMIC_STORE_HINT_S : BaseStoreHintPseudo<GPR32>;
-def ATOMIC_STORE_HINT_D : BaseStoreHintPseudo<GPR64>;
-
-class atomic_hint_store<PatFrag Base, bit Rel, bit SeqCst, code Pred, code GIPred>
-  : PatFrag<(ops node:$ptr, node:$val),
+let Size = 8, isCodeGenOnly = 1, hasSideEffects = 1, mayStore = 1 in {
+  class BaseStoreHintPseudo<RegisterClass regtype>
+        : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
+                          i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]>;
+
+  class BaseStoreHintPseudoROW<RegisterClass regtype, Operand WExt>
+        : Pseudo<(outs), (ins GPR64sp:$Rn, GPR32:$Rm, regtype:$data, WExt:$extend,
+                          i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]>;
+
+  class BaseStoreHintPseudoROX<RegisterClass regtype, Operand XExt>
+        : Pseudo<(outs), (ins GPR64sp:$Rn, GPR64:$Rm, regtype:$data, XExt:$extend,
+                          i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]>;
+
+  class BaseStoreHintPseudoUImm<RegisterClass regtype, Operand UImmOp>
+        : Pseudo<(outs), (ins GPR64sp:$Rn, regtype:$data, UImmOp:$offset,
+                          i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]>;
+
+  class BaseStoreHintPseudoImm<RegisterClass regtype>
+        : Pseudo<(outs), (ins GPR64sp:$Rn, regtype:$data, simm9:$offset,
+                          i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]>;
+}
+
+multiclass AtomicStoreHintPseudos<RegisterClass regtype, Operand WExt, Operand XExt, Operand UImmOp> {
+  def NAME : BaseStoreHintPseudo<regtype>;
+  def NAME # roW : BaseStoreHintPseudoROW<regtype, WExt>;
+  def NAME # roX : BaseStoreHintPseudoROX<regtype, XExt>;
+  def NAME # ui  : BaseStoreHintPseudoUImm<regtype, UImmOp>;
+  def NAME # i   : BaseStoreHintPseudoImm<regtype>;
+}
+
+defm ATOMIC_STORE_HINT_B : AtomicStoreHintPseudos<GPR32, ro_Wextend8,  ro_Xextend8,  uimm12s1>;
+defm ATOMIC_STORE_HINT_H : AtomicStoreHintPseudos<GPR32, ro_Wextend16, ro_Xextend16, uimm12s2>;
+defm ATOMIC_STORE_HINT_S : AtomicStoreHintPseudos<GPR32, ro_Wextend32, ro_Xextend32, uimm12s4>;
+defm ATOMIC_STORE_HINT_D : AtomicStoreHintPseudos<GPR64, ro_Wextend64, ro_Xextend64, uimm12s8>;
+
+class atomic_hint_store<PatFrag Base, int Order, code Pred, code GIPred>
+  : PatFrag<(ops node:$val, node:$ptr),
             (Base node:$val, node:$ptr), Pred> {
   let IsAtomic = 1;
-  let IsAtomicOrderingReleaseOrStronger = Rel;
-  let IsAtomicOrderingSequentiallyConsistent = SeqCst;
+  let IsAtomicOrderingMonotonic = !eq(Order, 2);
+  let IsAtomicOrderingRelease = !eq(Order, 5);
+  let IsAtomicOrderingSequentiallyConsistent = !eq(Order, 7);
   let GISelPredicateCode = GIPred;
 }
 
-multiclass AtomicHintPatterns<int Order, int Hint, bit Rel, bit SeqCst, code Pred, code GIPred> {
-  def : Pat<(atomic_hint_store<atomic_store_8, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+multiclass AtomicHintPatternsAddrMode<int Hint, code Pred, code GIPred> {
+  def : Pat<(atomic_hint_store<atomic_store_8, 2, Pred, GIPred>
+             GPR32:$data, (ro_Windexed8 GPR64sp:$addr, GPR32:$Rm, ro_Wextend8:$extend)),
+            (ATOMIC_STORE_HINT_BroW GPR64sp:$addr, GPR32:$Rm, GPR32:$data, ro_Wextend8:$extend, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_16, 2, Pred, GIPred>
+             GPR32:$data, (ro_Windexed16 GPR64sp:$addr, GPR32:$Rm, ro_Wextend16:$extend)),
+            (ATOMIC_STORE_HINT_HroW GPR64sp:$addr, GPR32:$Rm, GPR32:$data, ro_Wextend16:$extend, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_32, 2, Pred, GIPred>
+             GPR32:$data, (ro_Windexed32 GPR64sp:$addr, GPR32:$Rm, ro_Wextend32:$extend)),
+            (ATOMIC_STORE_HINT_SroW GPR64sp:$addr, GPR32:$Rm, GPR32:$data, ro_Wextend32:$extend, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_64, 2, Pred, GIPred>
+             GPR64:$data, (ro_Windexed64 GPR64sp:$addr, GPR32:$Rm, ro_Wextend64:$extend)),
+            (ATOMIC_STORE_HINT_DroW GPR64sp:$addr, GPR32:$Rm, GPR64:$data, ro_Wextend64:$extend, (i32 2), (i32 Hint))>;
+
+  def : Pat<(atomic_hint_store<atomic_store_8, 2, Pred, GIPred>
+             GPR32:$data, (ro_Xindexed8 GPR64sp:$addr, GPR64:$Rm, ro_Xextend8:$extend)),
+            (ATOMIC_STORE_HINT_BroX GPR64sp:$addr, GPR64:$Rm, GPR32:$data, ro_Xextend8:$extend, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_16, 2, Pred, GIPred>
+             GPR32:$data, (ro_Xindexed16 GPR64sp:$addr, GPR64:$Rm, ro_Xextend16:$extend)),
+            (ATOMIC_STORE_HINT_HroX GPR64sp:$addr, GPR64:$Rm, GPR32:$data, ro_Xextend16:$extend, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_32, 2, Pred, GIPred>
+             GPR32:$data, (ro_Xindexed32 GPR64sp:$addr, GPR64:$Rm, ro_Xextend32:$extend)),
+            (ATOMIC_STORE_HINT_SroX GPR64sp:$addr, GPR64:$Rm, GPR32:$data, ro_Xextend32:$extend, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_64, 2, Pred, GIPred>
+             GPR64:$data, (ro_Xindexed64 GPR64sp:$addr, GPR64:$Rm, ro_Xextend64:$extend)),
+            (ATOMIC_STORE_HINT_DroX GPR64sp:$addr, GPR64:$Rm, GPR64:$data, ro_Xextend64:$extend, (i32 2), (i32 Hint))>;
+
+  def : Pat<(atomic_hint_store<atomic_store_8, 2, Pred, GIPred>
+             GPR32:$data, (am_indexed8 GPR64sp:$addr, uimm12s1:$offset)),
+            (ATOMIC_STORE_HINT_Bui GPR64sp:$addr, GPR32:$data, uimm12s1:$offset, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_16, 2, Pred, GIPred>
+             GPR32:$data, (am_indexed16 GPR64sp:$addr, uimm12s2:$offset)),
+            (ATOMIC_STORE_HINT_Hui GPR64sp:$addr, GPR32:$data, uimm12s2:$offset, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_32, 2, Pred, GIPred>
+             GPR32:$data, (am_indexed32 GPR64sp:$addr, uimm12s4:$offset)),
+            (ATOMIC_STORE_HINT_Sui GPR64sp:$addr, GPR32:$data, uimm12s4:$offset, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_64, 2, Pred, GIPred>
+             GPR64:$data, (am_indexed64 GPR64sp:$addr, uimm12s8:$offset)),
+            (ATOMIC_STORE_HINT_Dui GPR64sp:$addr, GPR64:$data, uimm12s8:$offset, (i32 2), (i32 Hint))>;
+
+  def : Pat<(atomic_hint_store<atomic_store_8, 2, Pred, GIPred>
+             GPR32:$data, (am_unscaled8 GPR64sp:$addr, simm9:$offset)),
+            (ATOMIC_STORE_HINT_Bi GPR64sp:$addr, GPR32:$data, simm9:$offset, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_16, 2, Pred, GIPred>
+             GPR32:$data, (am_unscaled16 GPR64sp:$addr, simm9:$offset)),
+            (ATOMIC_STORE_HINT_Hi GPR64sp:$addr, GPR32:$data, simm9:$offset, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_32, 2, Pred, GIPred>
+             GPR32:$data, (am_unscaled32 GPR64sp:$addr, simm9:$offset)),
+            (ATOMIC_STORE_HINT_Si GPR64sp:$addr, GPR32:$data, simm9:$offset, (i32 2), (i32 Hint))>;
+  def : Pat<(atomic_hint_store<atomic_store_64, 2, Pred, GIPred>
+             GPR64:$data, (am_unscaled64 GPR64sp:$addr, simm9:$offset)),
+            (ATOMIC_STORE_HINT_Di GPR64sp:$addr, GPR64:$data, simm9:$offset, (i32 2), (i32 Hint))>;
+}
+
+multiclass AtomicHintPatterns<int Order, int Hint, code Pred, code GIPred> {
+  def : Pat<(atomic_hint_store<atomic_store_8,  Order, Pred, GIPred> GPR32:$data, GPR64sp:$addr),
             (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
-  def : Pat<(atomic_hint_store<atomic_store_16, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+  def : Pat<(atomic_hint_store<atomic_store_16, Order, Pred, GIPred> GPR32:$data, GPR64sp:$addr),
             (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
-  def : Pat<(atomic_hint_store<atomic_store_32, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+  def : Pat<(atomic_hint_store<atomic_store_32, Order, Pred, GIPred> GPR32:$data, GPR64sp:$addr),
             (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
-  def : Pat<(atomic_hint_store<atomic_store_64, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR64:$data),
+  def : Pat<(atomic_hint_store<atomic_store_64, Order, Pred, GIPred> GPR64:$data, GPR64sp:$addr),
             (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 Order), (i32 Hint))>;
 }
 
 let AddedComplexity = 15 in {
-  defm : AtomicHintPatterns<2, 0, 0, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
-  defm : AtomicHintPatterns<5, 0, 1, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
-  defm : AtomicHintPatterns<7, 0, 0, 1, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
-  defm : AtomicHintPatterns<2, 1, 0, 0, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
-  defm : AtomicHintPatterns<5, 1, 1, 0, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
-  defm : AtomicHintPatterns<7, 1, 0, 1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+  defm : AtomicHintPatternsAddrMode<0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+  defm : AtomicHintPatternsAddrMode<1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+
+  defm : AtomicHintPatterns<2, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+  defm : AtomicHintPatterns<5, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+  defm : AtomicHintPatterns<7, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+  defm : AtomicHintPatterns<2, 1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+  defm : AtomicHintPatterns<5, 1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+  defm : AtomicHintPatterns<7, 1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
 }
 
 //===----------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 1d75d3aa89f77..8184e0150e964 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2893,8 +2893,6 @@ AArch64InstrInfo::decodeAtomicHintFlags(MachineMemOperand::Flags MMOFlags) {
   if (MMOFlags & MOAtomicHintBit1)
     AtomicHint += 0b10;
 
-  if (!isValidAArch64AtomicHintValue(AtomicHint))
-    return AArch64AtomicStoreHint::HINT_NONE;
   return static_cast<AArch64AtomicStoreHint>(AtomicHint);
 }
 
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 3c1f7e6213a36..525ead663ff84 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2560,14 +2560,11 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
       return false;
     }
 
-    unsigned HintOpc;
-    unsigned StoreSize = St.getMemSizeInBits().getValue();
+    // AtomicExpandPass converts FP stores to integer stores of the equivalent
+    // bitwidth. Widen the register here if the original type was f16.
+    unsigned StoreSize = St.getMemSize().getValue();
     Register ValueReg = St.getValueReg();
-    switch (StoreSize) {
-    case 8:
-      HintOpc = AArch64::ATOMIC_STORE_HINT_B;
-      break;
-    case 16: {
+    if (StoreSize == 2) {
       Register CastReg;
       if (mi_match(ValueReg, MRI, m_GBitcast(m_Reg(CastReg)))) {
         auto Undef = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF,
@@ -2579,26 +2576,72 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
         constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
         ValueReg = Ins.getReg(0);
       }
-      HintOpc = AArch64::ATOMIC_STORE_HINT_H;
-      break;
     }
-    case 32:
-      HintOpc = AArch64::ATOMIC_STORE_HINT_S;
-      break;
-    case 64:
-      HintOpc = AArch64::ATOMIC_STORE_HINT_D;
-      break;
-    default:
-      llvm_unreachable("Unexpected getMemSizeInBits() value for atomic hint.");
+
+    static constexpr unsigned BaseOpcodes[] = {
+        AArch64::ATOMIC_STORE_HINT_B, AArch64::ATOMIC_STORE_HINT_H,
+        AArch64::ATOMIC_STORE_HINT_S, AArch64::ATOMIC_STORE_HINT_D};
+    static constexpr unsigned RegWOpcodes[] = {
+        AArch64::ATOMIC_STORE_HINT_BroW, AArch64::ATOMIC_STORE_HINT_HroW,
+        AArch64::ATOMIC_STORE_HINT_SroW, AArch64::ATOMIC_STORE_HINT_DroW};
+    static constexpr unsigned RegXOpcodes[] = {
+        AArch64::ATOMIC_STORE_HINT_BroX, AArch64::ATOMIC_STORE_HINT_HroX,
+        AArch64::ATOMIC_STORE_HINT_SroX, AArch64::ATOMIC_STORE_HINT_DroX};
+    static constexpr unsigned UImmOpcodes[] = {
+        AArch64::ATOMIC_STORE_HINT_Bui, AArch64::ATOMIC_STORE_HINT_Hui,
+        AArch64::ATOMIC_STORE_HINT_Sui, AArch64::ATOMIC_STORE_HINT_Dui};
+    static constexpr unsigned ImmOpcodes[] = {
+        AArch64::ATOMIC_STORE_HINT_Bi, AArch64::ATOMIC_STORE_HINT_Hi,
+        AArch64::ATOMIC_STORE_HINT_Si, AArch64::ATOMIC_STORE_HINT_Di};
+
+    AtomicOrdering Ordering = MMO.getSuccessOrdering();
+    MachineInstrBuilder StrPseudo;
+    unsigned HintOpc = 0;
+
+    // Other addressing modes can be used when the ordering is monotonic.
+    // Try to match these first, before falling back on the basic operands.
+    auto AddModeWRO = selectAddrModeWRO(St.getOperand(1), StoreSize);
+    auto AddModeXRO = selectAddrModeXRO(St.getOperand(1), StoreSize);
+    auto AddModeUImm = selectAddrModeIndexed(St.getOperand(1), StoreSize);
+    auto AddModeImm = selectAddrModeUnscaled(St.getOperand(1), StoreSize);
+
+    if (AddModeWRO && Ordering == AtomicOrdering::Monotonic) {
+      HintOpc = RegWOpcodes[Log2_32(StoreSize)];
+      StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc));
+      (*AddModeWRO)[0](StrPseudo);
+      (*AddModeWRO)[1](StrPseudo);
+      StrPseudo.addReg(ValueReg);
+      (*AddModeWRO)[2](StrPseudo);
+    } else if (AddModeXRO && Ordering == AtomicOrdering::Monotonic) {
+      HintOpc = RegXOpcodes[Log2_32(StoreSize)];
+      StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc));
+      (*AddModeXRO)[0](StrPseudo);
+      (*AddModeXRO)[1](StrPseudo);
+      StrPseudo.addReg(ValueReg);
+      (*AddModeXRO)[2](StrPseudo);
+    } else if (AddModeUImm && Ordering == AtomicOrdering::Monotonic) {
+      HintOpc = UImmOpcodes[Log2_32(StoreSize)];
+      StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc));
+      (*AddModeUImm)[0](StrPseudo);
+      StrPseudo.addReg(ValueReg);
+      (*AddModeUImm)[1](StrPseudo);
+    } else if (AddModeImm && Ordering == AtomicOrdering::Monotonic) {
+      HintOpc = ImmOpcodes[Log2_32(StoreSize)];
+      StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc));
+      (*AddModeImm)[0](StrPseudo);
+      StrPseudo.addReg(ValueReg);
+      (*AddModeImm)[1](StrPseudo);
+    } else {
+      HintOpc = BaseOpcodes[Log2_32(StoreSize)];
+      StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc))
+                      .addReg(St.getPointerReg())
+                      .addReg(ValueReg);
     }
 
+    // Add the ordering and hint operands, before erasing the store.
     unsigned HintImm = Hint == AArch64AtomicStoreHint::HINT_STSHH_KEEP ? 0 : 1;
-
-    auto StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc))
-                         .addReg(St.getPointerReg())
-                         .addReg(ValueReg)
-                         .addImm((int)MMO.getSuccessOrdering())
-                         .addImm(static_cast<unsigned>(HintImm));
+    StrPseudo.addImm((int)Ordering);
+    StrPseudo.addImm(HintImm);
 
     StrPseudo.cloneMemRefs(I);
     I.eraseFromParent();
@@ -8094,8 +8137,7 @@ void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
 
 bool AArch64InstructionSelector::isAtomicHintInst(
     const MachineInstr &MI, AArch64AtomicStoreHint Hint) const {
-  const GStore &St = cast<GStore>(MI);
-  auto MMO = St.getMMO();
+  auto MMO = cast<GStore>(MI).getMMO();
   return AArch64InstrInfo::decodeAtomicHintFlags(MMO.getFlags()) == Hint;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
index dfcfa92cbc6c8..21aec7dae322f 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
@@ -6,7 +6,7 @@
 ; STSHH: Keep, Relaxed
 ;
 
-define dso_local void @test_atomic_store_keep_relaxed_i8(ptr %ptr, i8 %val) nounwind {
+define void @test_atomic_store_keep_relaxed_i8(ptr %ptr, i8 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_relaxed_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
@@ -16,7 +16,7 @@ define dso_local void @test_atomic_store_keep_relaxed_i8(ptr %ptr, i8 %val) noun
   ret void
 }
 
-define dso_local void @test_atomic_store_keep_relaxed_i16(ptr %ptr, i16 %val) nounwind {
+define void @test_atomic_store_keep_relaxed_i16(ptr %ptr, i16 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_relaxed_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
@@ -26,7 +26,7 @@ define dso_local void @test_atomic_store_keep_relaxed_i16(ptr %ptr, i16 %val) no
   ret void
 }
 
-define dso_local void @test_atomic_store_keep_relaxed_i32(ptr %ptr, i32 %val) nounwind {
+define void @test_atomic_store_keep_relaxed_i32(ptr %ptr, i32 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_relaxed_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
@@ -36,7 +36,7 @@ define dso_local void @test_atomic_store_keep_relaxed_i32(ptr %ptr, i32 %val) no
   ret void
 }
 
-define dso_local void @test_atomic_store_keep_relaxed_i64(ptr %ptr, i64 %val) nounwind {
+define void @test_atomic_store_keep_relaxed_i64(ptr %ptr, i64 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_relaxed_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
@@ -50,7 +50,7 @@ define dso_local void @test_atomic_store_keep_relaxed_i64(ptr %ptr, i64 %val) no
 ; STSHH: Keep, Release
 ;
 
-define dso_local void @test_atomic_store_keep_release_bfloat(ptr %ptr, bfloat %val) nounwind {
+define void @test_atomic_store_keep_release_bfloat(ptr %ptr, bfloat %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_release_bfloat:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
@@ -62,7 +62,7 @@ define dso_local void @test_atomic_store_keep_release_bfloat(ptr %ptr, bfloat %v
   ret void
 }
 
-define dso_local void @test_atomic_store_keep_release_half(ptr %ptr, half %val) nounwind {
+define void @test_atomic_store_keep_release_half(ptr %ptr, half %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_release_half:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
@@ -74,7 +74,7 @@ define dso_local void @test_atomic_store_keep_release_half(ptr %ptr, half %val)
   ret void
 }
 
-define dso_local void @test_atomic_store_keep_release_float(ptr %ptr, float %val) nounwind {
+define void @test_atomic_store_keep_release_float(ptr %ptr, float %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_release_float:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov w8, s0
@@ -85,7 +85,7 @@ define dso_local void @test_atomic_store_keep_release_float(ptr %ptr, float %val
   ret void
 }
 
-define dso_local void @test_atomic_store_keep_release_double(ptr %ptr, double %val) nounwind {
+define void @test_atomic_store_keep_release_double(ptr %ptr, double %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_release_double:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov x8, d0
@@ -100,7 +100,7 @@ define dso_local void @test_atomic_store_keep_release_double(ptr %ptr, double %v
 ; STSHH: Keep, SequentiallyConsistent
 ;
 
-define dso_local void @test_atomic_store_keep_seqcst_i8(ptr %ptr, i8 %val) nounwind {
+define void @test_atomic_store_keep_seqcst_i8(ptr %ptr, i8 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_seqcst_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
@@ -110,7 +110,7 @@ define dso_local void @test_atomic_store_keep_seqcst_i8(ptr %ptr, i8 %val) nounw
   ret void
 }
 
-define dso_local void @test_atomic_store_keep_seqcst_i16(ptr %ptr, i16 %val) nounwind {
+define void @test_atomic_store_keep_seqcst_i16(ptr %ptr, i16 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_seqcst_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
@@ -120,7 +120,7 @@ define dso_local void @test_atomic_store_keep_seqcst_i16(ptr %ptr, i16 %val) nou
   ret void
 }
 
-define dso_local void @test_atomic_store_keep_seqcst_i32(ptr %ptr, i32 %val) nounwind {
+define void @test_atomic_store_keep_seqcst_i32(ptr %ptr, i32 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_seqcst_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
@@ -130,7 +130,7 @@ define dso_local void @test_atomic_store_keep_seqcst_i32(ptr %ptr, i32 %val) nou
   ret void
 }
 
-define dso_local void @test_atomic_store_keep_seqcst_i64(ptr %ptr, i64 %val) nounwind {
+define void @test_atomic_store_keep_seqcst_i64(ptr %ptr, i64 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_keep_seqcst_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh keep
@@ -144,7 +144,7 @@ define dso_local void @test_atomic_store_keep_seqcst_i64(ptr %ptr, i64 %val) nou
 ; STSHH: Stream, Relaxed
 ;
 
-define dso_local void @test_atomic_store_strm_relaxed_bfloat(ptr %ptr, bfloat %val) nounwind {
+define void @test_atomic_store_strm_relaxed_bfloat(ptr %ptr, bfloat %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_strm_relaxed_bfloat:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
@@ -156,7 +156,7 @@ define dso_local void @test_atomic_store_strm_relaxed_bfloat(ptr %ptr, bfloat %v
   ret void
 }
 
-define dso_local void @test_atomic_store_strm_relaxed_half(ptr %ptr, half %val) nounwind {
+define void @test_atomic_store_strm_relaxed_half(ptr %ptr, half %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_strm_relaxed_half:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
@@ -168,7 +168,7 @@ define dso_local void @test_atomic_store_strm_relaxed_half(ptr %ptr, half %val)
   ret void
 }
 
-define dso_local void @test_atomic_store_strm_relaxed_float(ptr %ptr, float %val) nounwind {
+define void @test_atomic_store_strm_relaxed_float(ptr %ptr, float %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_strm_relaxed_float:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov w8, s0
@@ -179,7 +179,7 @@ define dso_local void @test_atomic_store_strm_relaxed_float(ptr %ptr, float %val
   ret void
 }
 
-define dso_local void @test_atomic_store_strm_relaxed_double(ptr %ptr, double %val) nounwind {
+define void @test_atomic_store_strm_relaxed_double(ptr %ptr, double %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_strm_relaxed_double:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov x8, d0
@@ -194,7 +194,7 @@ define dso_local void @test_atomic_store_strm_relaxed_double(ptr %ptr, double %v
 ; STSHH: Stream, Release
 ;
 
-define dso_local void @test_atomic_store_stream_release_i8(ptr %ptr, i8 %val) nounwind {
+define void @test_atomic_store_stream_release_i8(ptr %ptr, i8 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_stream_release_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh strm
@@ -204,7 +204,7 @@ define dso_local void @test_atomic_store_stream_release_i8(ptr %ptr, i8 %val) no
   ret void
 }
 
-define dso_local void @test_atomic_store_stream_release_i16(ptr %ptr, i16 %val) nounwind {
+define void @test_atomic_store_stream_release_i16(ptr %ptr, i16 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_stream_release_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh strm
@@ -214,7 +214,7 @@ define dso_local void @test_atomic_store_stream_release_i16(ptr %ptr, i16 %val)
   ret void
 }
 
-define dso_local void @test_atomic_store_stream_release_i32(ptr %ptr, i32 %val) nounwind {
+define void @test_atomic_store_stream_release_i32(ptr %ptr, i32 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_stream_release_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh strm
@@ -224,7 +224,7 @@ define dso_local void @test_atomic_store_stream_release_i32(ptr %ptr, i32 %val)
   ret void
 }
 
-define dso_local void @test_atomic_store_stream_release_i64(ptr %ptr, i64 %val) nounwind {
+define void @test_atomic_store_stream_release_i64(ptr %ptr, i64 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_stream_release_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stshh strm
@@ -238,7 +238,7 @@ define dso_local void @test_atomic_store_stream_release_i64(ptr %ptr, i64 %val)
 ; STSHH: Stream, SequentiallyConsistent
 ;
 
-define dso_local void @test_atomic_store_stream_seqcst_bfloat(ptr %ptr, bfloat %val) nounwind {
+define void @test_atomic_store_stream_seqcst_bfloat(ptr %ptr, bfloat %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_stream_seqcst_bfloat:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
@@ -250,7 +250,7 @@ define dso_local void @test_atomic_store_stream_seqcst_bfloat(ptr %ptr, bfloat %
   ret void
 }
 
-define dso_local void @test_atomic_store_stream_seqcst_i16(ptr %ptr, half %val) nounwind {
+define void @test_atomic_store_stream_seqcst_i16(ptr %ptr, half %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_stream_seqcst_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
@@ -262,7 +262,7 @@ define dso_local void @test_atomic_store_stream_seqcst_i16(ptr %ptr, half %val)
   ret void
 }
 
-define dso_local void @test_atomic_store_stream_seqcst_i32(ptr %ptr, float %val) nounwind {
+define void @test_atomic_store_stream_seqcst_i32(ptr %ptr, float %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_stream_seqcst_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov w8, s0
@@ -273,7 +273,7 @@ define dso_local void @test_atomic_store_stream_seqcst_i32(ptr %ptr, float %val)
   ret void
 }
 
-define dso_local void @test_atomic_store_stream_seqcst_double(ptr %ptr, double %val) nounwind {
+define void @test_atomic_store_stream_seqcst_double(ptr %ptr, double %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_stream_seqcst_double:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov x8, d0
@@ -284,5 +284,19 @@ define dso_local void @test_atomic_store_stream_seqcst_double(ptr %ptr, double %
   ret void
 }
 
+;
+; Invalid Hint
+;
+
+define void @test_atomic_store_invalid_hint(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_invalid_hint:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stlrb w1, [x0]
+; CHECK-NEXT:    ret
+  store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !2
+  ret void
+}
+
 !0 = !{i32 0}
 !1 = !{i32 1}
+!2 = !{i32 2}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-relaxed-store-hint.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-relaxed-store-hint.ll
new file mode 100644
index 0000000000000..d80d9460c107c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-relaxed-store-hint.ll
@@ -0,0 +1,416 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -global-isel=1 < %s | FileCheck %s
+
+;
+; W register offset
+;
+
+; Int
+
+define void @relaxed_store_hint_roW_i8(ptr %ptr, i32 %offset, i8 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strb w2, [x0, w1, sxtw]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i8, ptr %ptr, i32 %offset
+  store atomic i8 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_roW_i16(ptr %ptr, i32 %offset, i16 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strh w2, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i16, ptr %ptr, i32 %offset
+  store atomic i16 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_roW_i32(ptr %ptr, i32 %offset, i32 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str w2, [x0, w1, sxtw #2]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i32, ptr %ptr, i32 %offset
+  store atomic i32 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_roW_i64(ptr %ptr, i32 %offset, i64 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str x2, [x0, w1, sxtw #3]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i64, ptr %ptr, i32 %offset
+  store atomic i64 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+; FP
+
+define void @relaxed_store_hint_roW_bfloat(ptr %ptr, i32 %offset, bfloat %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strh w8, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %addr = getelementptr bfloat, ptr %ptr, i32 %offset
+  store atomic bfloat %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_roW_half(ptr %ptr, i32 %offset, half %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_half:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strh w8, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %addr = getelementptr half, ptr %ptr, i32 %offset
+  store atomic half %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_roW_float(ptr %ptr, i32 %offset, float %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str w8, [x0, w1, sxtw #2]
+; CHECK-NEXT:    ret
+  %addr = getelementptr float, ptr %ptr, i32 %offset
+  store atomic float %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_roW_double(ptr %ptr, i32 %offset, double %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str x8, [x0, w1, sxtw #3]
+; CHECK-NEXT:    ret
+  %addr = getelementptr double, ptr %ptr, i32 %offset
+  store atomic double %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+;
+; X register offset
+;
+
+; Int
+
+define void @relaxed_store_hint_roX_i8(ptr %ptr, i64 %offset, i8 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strb w2, [x0, x1]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i8, ptr %ptr, i64 %offset
+  store atomic i8 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_roX_i16(ptr %ptr, i64 %offset, i16 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strh w2, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i16, ptr %ptr, i64 %offset
+  store atomic i16 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_roX_i32(ptr %ptr, i64 %offset, i32 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str w2, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i32, ptr %ptr, i64 %offset
+  store atomic i32 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_roX_i64(ptr %ptr, i64 %offset, i64 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str x2, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i64, ptr %ptr, i64 %offset
+  store atomic i64 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+; FP
+
+define void @relaxed_store_hint_roX_bfloat(ptr %ptr, i64 %offset, bfloat %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strh w8, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %addr = getelementptr bfloat, ptr %ptr, i64 %offset
+  store atomic bfloat %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_roX_half(ptr %ptr, i64 %offset, half %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_half:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strh w8, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %addr = getelementptr half, ptr %ptr, i64 %offset
+  store atomic half %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_roX_float(ptr %ptr, i64 %offset, float %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str w8, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
+  %addr = getelementptr float, ptr %ptr, i64 %offset
+  store atomic float %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_roX_double(ptr %ptr, i64 %offset, double %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str x8, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
+  %addr = getelementptr double, ptr %ptr, i64 %offset
+  store atomic double %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+;
+; Unsigned immediate offset
+;
+
+; Int
+
+define void @relaxed_store_hint_uimm_i8(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strb w1, [x0, #4095]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i8, ptr %ptr, i32 4095
+  store atomic i8 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_uimm_i16(ptr %ptr, i16 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    strh w1, [x0, #4096]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i16, ptr %ptr, i32 2048
+  store atomic i16 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_uimm_i32(ptr %ptr, i32 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str w1, [x0, #4096]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i32, ptr %ptr, i32 1024
+  store atomic i32 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_uimm_i64(ptr %ptr, i64 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str x1, [x0, #4096]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i64, ptr %ptr, i32 512
+  store atomic i64 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+; FP
+
+define void @relaxed_store_hint_uimm_bfloat(ptr %ptr, bfloat %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strh w8, [x0, #4096]
+; CHECK-NEXT:    ret
+  %addr = getelementptr bfloat, ptr %ptr, i32 2048
+  store atomic bfloat %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_uimm_half(ptr %ptr, half %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_half:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    strh w8, [x0, #4096]
+; CHECK-NEXT:    ret
+  %addr = getelementptr half, ptr %ptr, i32 2048
+  store atomic half %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_uimm_float(ptr %ptr, float %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str w8, [x0, #8192]
+; CHECK-NEXT:    ret
+  %addr = getelementptr float, ptr %ptr, i32 2048
+  store atomic float %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_uimm_double(ptr %ptr, double %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    str x8, [x0, #16384]
+; CHECK-NEXT:    ret
+  %addr = getelementptr double, ptr %ptr, i32 2048
+  store atomic double %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+;
+; Signed immediate offset
+;
+
+; Int
+
+define void @relaxed_store_hint_imm_i8(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    sturb w1, [x0, #-256]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i8, ptr %ptr, i32 -256
+  store atomic i8 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_imm_i16(ptr %ptr, i16 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    sturh w1, [x0, #-256]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i16, ptr %ptr, i32 -128
+  store atomic i16 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_imm_i32(ptr %ptr, i32 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stur w1, [x0, #-256]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i32, ptr %ptr, i32 -64
+  store atomic i32 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+define void @relaxed_store_hint_imm_i64(ptr %ptr, i64 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stshh keep
+; CHECK-NEXT:    stur x1, [x0, #-256]
+; CHECK-NEXT:    ret
+  %addr = getelementptr i64, ptr %ptr, i32 -32
+  store atomic i64 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+  ret void
+}
+
+; FP
+
+define void @relaxed_store_hint_imm_bfloat(ptr %ptr, bfloat %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_bfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    sturh w8, [x0, #-256]
+; CHECK-NEXT:    ret
+  %addr = getelementptr bfloat, ptr %ptr, i32 -128
+  store atomic bfloat %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_imm_half(ptr %ptr, half %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_half:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    sturh w8, [x0, #-256]
+; CHECK-NEXT:    ret
+  %addr = getelementptr half, ptr %ptr, i32 -128
+  store atomic half %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_imm_float(ptr %ptr, float %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w9, s0
+; CHECK-NEXT:    sub x8, x0, #512
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str w9, [x8]
+; CHECK-NEXT:    ret
+  %addr = getelementptr float, ptr %ptr, i32 -128
+  store atomic float %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+define void @relaxed_store_hint_imm_double(ptr %ptr, double %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x9, d0
+; CHECK-NEXT:    sub x8, x0, #1024
+; CHECK-NEXT:    stshh strm
+; CHECK-NEXT:    str x9, [x8]
+; CHECK-NEXT:    ret
+  %addr = getelementptr double, ptr %ptr, i32 -128
+  store atomic double %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+  ret void
+}
+
+!0 = !{ i32 0 }
+!1 = !{ i32 1 }



More information about the cfe-commits mailing list