[clang] [llvm] [AArch64] Implement the atomic store with hint intrinsic (PR #198316)
Kerry McLaughlin via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 26 08:16:02 PDT 2026
https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/198316
>From 106c21f129e83919e43489241b35c1378a16fb05 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Wed, 6 May 2026 13:08:02 +0000
Subject: [PATCH 1/4] [AArch64] Implement the atomic store with hint intrinsic
Adds the following ACLE intrinsic as described in [1]:
void __arm_atomic_store_with_hint(type *ptr, type data,
int memory_order, int hint);
A regular atomic store instruction is emitted in Clang for this builtin
with additional metadata (`!aarch64.atomic.hint`), which ensures the
instruction is recognised as atomic by passes in LLVM.
When an atomic store has this metadata, this lowers to the ATOMIC_STORE_HINT
pseudo which is later expanded by AArch64ExpandPseudoInsts into an STSHH
instruction plus an atomic store.
The hint value is represented using MOTargetFlag3 & MOTargetFlag4 flags,
which will need to be extended when new hints are added in future.
[1] https://github.com/ARM-software/acle/pull/432
---
clang/include/clang/Basic/BuiltinsAArch64.td | 4 +
.../clang/Basic/DiagnosticSemaKinds.td | 6 +
clang/include/clang/Sema/SemaARM.h | 1 +
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 54 ++++
clang/lib/Headers/arm_acle.h | 6 +
clang/lib/Sema/SemaARM.cpp | 92 ++++++
clang/test/CodeGen/arm_acle.c | 78 +++++
clang/test/CodeGen/builtins-arm64.c | 13 +
clang/test/Sema/builtins-arm64.c | 17 ++
.../include/llvm/Support/AArch64AtomicHints.h | 36 +++
llvm/lib/CodeGen/AtomicExpandPass.cpp | 3 +
.../AArch64/AArch64ExpandPseudoInsts.cpp | 56 ++++
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 20 ++
.../Target/AArch64/AArch64ISelLowering.cpp | 21 +-
.../lib/Target/AArch64/AArch64InstrAtomics.td | 109 +++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 13 +
llvm/lib/Target/AArch64/AArch64InstrInfo.h | 9 +
.../GISel/AArch64InstructionSelector.cpp | 61 ++++
.../Atomics/aarch64-atomic-store-hint.ll | 288 ++++++++++++++++++
.../Atomics/aarch64-atomic-store-hint.mir | 30 ++
20 files changed, 916 insertions(+), 1 deletion(-)
create mode 100644 llvm/include/llvm/Support/AArch64AtomicHints.h
create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 15257f3db5b41..19a65e7bdf8fe 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -171,6 +171,10 @@ let Attributes = [NoThrow], Features = "ls64" in {
def st64bv0 : AArch64TargetBuiltin<"uint64_t (void *, uint64_t const *)">;
}
+let Attributes = [NoThrow, CustomTypeChecking] in {
+ def atomic_store_with_hint : AArch64Builtin<"void(...)">;
+}
+
// Armv9.3-A Guarded Control Stack
let Attributes = [NoThrow], Features = "gcs" in {
def gcspopm : AArch64TargetBuiltin<"uint64_t (uint64_t)">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f84cd8dca6d4c..11ddd5b61e4cb 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9667,6 +9667,12 @@ def err_atomic_op_needs_atomic_int_or_fp : Error<
def err_atomic_op_needs_atomic_int : Error<
"address argument to atomic operation must be a pointer to "
"%select{|atomic }0integer (%1 invalid)">;
+def err_atomic_op_hint_data_size : Error<
+ "address argument to atomic store with hint must be of size 8, 16, 32 or 64 bits">;
+def err_atomic_hint_has_invalid_memory_order : Error<
+ "invalid memory order argument to atomic hint operation (%0 invalid)">;
+def err_atomic_hint_has_invalid_hint_type : Error<
+ "invalid hint type argument to atomic hint operation (%0 invalid)">;
def warn_atomic_op_has_invalid_memory_order : Warning<
"%select{|success |failure }0memory order argument to atomic operation is invalid">,
InGroup<DiagGroup<"atomic-memory-ordering">>;
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index af8e0e9047171..b0a01c40ffece 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -70,6 +70,7 @@ class SemaARM : public SemaBase {
bool BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, int ArgNum,
unsigned ExpectedFieldNum, bool AllowName);
bool BuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall);
+ bool BuiltinARMAtomicStoreHintCall(unsigned BuiltinID, CallExpr *TheCall);
bool MveAliasValid(unsigned BuiltinID, llvm::StringRef AliasName);
bool CdeAliasValid(unsigned BuiltinID, llvm::StringRef AliasName);
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 4c668dabd53dc..4a13767268f96 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsBPF.h"
+#include "llvm/Support/AArch64AtomicHints.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
#include <numeric>
@@ -2129,6 +2130,56 @@ static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
Ops);
}
+static Value *EmitAtomicStoreWithHintBuiltin(CodeGenFunction &CGF,
+ unsigned BuiltinID,
+ const CallExpr *E) {
+ CodeGen::CGBuilderTy &Builder = CGF.Builder;
+ CodeGen::CodeGenModule &CGM = CGF.CGM;
+ Expr::EvalResult Result;
+ if (!E->getArg(2)->EvaluateAsInt(Result, CGM.getContext()))
+ llvm_unreachable(
+ "Expected integer policy argument to atomic store with hint.");
+
+ StoreInst *Store =
+ Builder.CreateStore(CGF.EmitScalarExpr(E->getArg(1)), // Value
+ CGF.EmitPointerWithAlignment(E->getArg(0))); // Ptr;
+
+ AtomicOrdering Ordering;
+ unsigned OrderingArg = Result.Val.getInt().getExtValue();
+ assert(isValidAtomicOrderingCABI(OrderingArg) && "Invalid atomic ordering");
+
+ switch (static_cast<AtomicOrderingCABI>(OrderingArg)) {
+ default:
+ llvm_unreachable("Unsupported atomic ordering found.");
+ case AtomicOrderingCABI::relaxed:
+ Ordering = AtomicOrdering::Monotonic;
+ break;
+ case AtomicOrderingCABI::release:
+ Ordering = AtomicOrdering::Release;
+ break;
+ case AtomicOrderingCABI::seq_cst:
+ Ordering = AtomicOrdering::SequentiallyConsistent;
+ break;
+ }
+ Store->setAtomic(Ordering);
+
+ if (!E->getArg(3)->EvaluateAsInt(Result, CGM.getContext()))
+ llvm_unreachable(
+ "Expected integer hint argument to atomic store with hint.");
+ unsigned HintArg = Result.Val.getInt().getExtValue();
+ assert((getAtomicStoreHintFromMD(HintArg) !=
+ AArch64AtomicStoreHint::HINT_NONE) &&
+ "Invalid hint type");
+
+ MDNode *HintMDVal =
+ MDNode::get(CGM.getLLVMContext(),
+ llvm::ConstantAsMetadata::get(Builder.getInt32(HintArg)));
+ Store->setMetadata(CGM.getModule().getMDKindID("aarch64.atomic.hint"),
+ HintMDVal);
+
+ return Store;
+}
+
/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
/// argument that specifies the vector type. The additional argument is meant
/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
@@ -4927,6 +4978,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
return EmitRangePrefetchBuiltin(*this, BuiltinID, E);
+ if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_hint)
+ return EmitAtomicStoreWithHintBuiltin(*this, BuiltinID, E);
+
// Memory Tagging Extensions (MTE) Intrinsics
Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 9a6b6a837fa5a..bd99527dc5fa8 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -741,6 +741,12 @@ __arm_st64bv0(void *__addr, data512_t __value) {
}
#endif
+/* Atomic store with hints */
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+#define __arm_atomic_store_with_hint(ptr, data, memory_order, hint) \
+ __builtin_arm_atomic_store_with_hint(ptr, data, memory_order, hint)
+#endif
+
/* 11.1 Special register intrinsics */
#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 5e7504fab416d..78f83d18deab8 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -17,6 +17,7 @@
#include "clang/Sema/Initialization.h"
#include "clang/Sema/ParsedAttr.h"
#include "clang/Sema/Sema.h"
+#include "llvm/Support/AArch64AtomicHints.h"
namespace clang {
@@ -322,6 +323,94 @@ bool SemaARM::BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
return false;
}
+bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
+ CallExpr *TheCall) {
+ if (SemaRef.checkArgCount(TheCall, 4))
+ return true;
+
+ // Arg 0 should be the pointer type. The pointee type must be a
+ // scalar integral or floating-point type of 8, 16, 32 or 64 bits.
+ ASTContext &Context = getASTContext();
+ Expr *PtrArg = TheCall->getArg(0);
+ auto PtrArgRes = SemaRef.DefaultFunctionArrayLvalueConversion(PtrArg);
+ if (PtrArgRes.isInvalid())
+ return true;
+ auto *PtrTy = PtrArg->getType()->getAs<PointerType>();
+ if (!PtrTy)
+ return Diag(TheCall->getBeginLoc(),
+ diag::err_atomic_builtin_must_be_pointer)
+ << PtrArg->getType() << 0 << PtrArg->getSourceRange();
+ QualType PtrQT = PtrTy->getPointeeType();
+
+ // TODO: Allow MFloat8 types when supported by atomic store
+ if (!PtrQT->isIntegralType(getASTContext()) && !PtrQT->isFloatingType())
+ return Diag(TheCall->getBeginLoc(),
+ diag::err_atomic_op_needs_atomic_int_or_fp)
+ << 0 << PtrQT << PtrArg->getSourceRange();
+
+ unsigned TySize =
+ Context.getTypeSize(Context.getCanonicalType(PtrQT).getUnqualifiedType());
+ if (TySize != 8 && TySize != 16 && TySize != 32 && TySize != 64)
+ return Diag(TheCall->getBeginLoc(), diag::err_atomic_op_hint_data_size)
+ << PtrArg->getSourceRange();
+
+ // Arg 1 is the data to be stored. The type must match the pointee
+ // type found above.
+ auto DataArgRes =
+ SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(1));
+ if (DataArgRes.isInvalid())
+ return true;
+ QualType DataQT = DataArgRes.get()->getType();
+
+ if (PtrQT != DataQT)
+ return Diag(TheCall->getBeginLoc(),
+ diag::err_typecheck_call_different_arg_types)
+ << PtrQT << DataQT;
+
+ // Arg 2 is the memory order, which must be relaxed, release or seq_cst
+ auto MemOrdArg =
+ SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(2)).get();
+ std::optional<llvm::APSInt> MemOrdAP =
+ MemOrdArg->getIntegerConstantExpr(Context);
+ if (!MemOrdAP)
+ return Diag(TheCall->getBeginLoc(),
+ diag::err_atomic_hint_has_invalid_memory_order)
+ << MemOrdArg->getType() << MemOrdArg->getSourceRange();
+
+ unsigned Ordering = MemOrdAP->getZExtValue();
+ if (!llvm::isValidAtomicOrderingCABI(Ordering))
+ return Diag(TheCall->getBeginLoc(),
+ diag::err_atomic_hint_has_invalid_memory_order)
+ << *MemOrdAP << MemOrdArg->getSourceRange();
+
+ auto AtomicOrdering = static_cast<llvm::AtomicOrderingCABI>(Ordering);
+ if (AtomicOrdering != llvm::AtomicOrderingCABI::relaxed &&
+ AtomicOrdering != llvm::AtomicOrderingCABI::release &&
+ AtomicOrdering != llvm::AtomicOrderingCABI::seq_cst)
+ return Diag(TheCall->getBeginLoc(),
+ diag::err_atomic_hint_has_invalid_memory_order)
+ << *MemOrdAP << MemOrdArg->getSourceRange();
+
+ // Arg 3 is the hint type. Only values represented by AArch64AtomicStoreHint
+ // are valid.
+ auto HintArg =
+ SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(3)).get();
+ std::optional<llvm::APSInt> HintAP = HintArg->getIntegerConstantExpr(Context);
+ if (!HintAP)
+ return Diag(TheCall->getBeginLoc(),
+ diag::err_atomic_hint_has_invalid_hint_type)
+ << HintArg->getType() << HintArg->getSourceRange();
+
+ unsigned Hint = HintAP->getZExtValue();
+ if (llvm::getAtomicStoreHintFromMD(Hint) ==
+ llvm::AArch64AtomicStoreHint::HINT_NONE)
+ return Diag(TheCall->getBeginLoc(),
+ diag::err_atomic_hint_has_invalid_hint_type)
+ << *HintAP << HintArg->getSourceRange();
+
+ return false;
+}
+
/// getNeonEltType - Return the QualType corresponding to the elements of
/// the vector type specified by the NeonTypeFlags. This is used to check
/// the pointer arguments for Neon load/store intrinsics.
@@ -1166,6 +1255,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
BuiltinID == AArch64::BI__builtin_arm_wsrp)
return BuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true);
+ if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_hint)
+ return BuiltinARMAtomicStoreHintCall(BuiltinID, TheCall);
+
// Only check the valid encoding range. Any constant in this range would be
// converted to a register of the form S2_2_C3_C4_5. Let the hardware throw
// an exception for incorrect registers. This matches MSVC behavior.
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index cd18fa63bfdbd..a8aa0916a8a4c 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1821,3 +1821,81 @@ int test_rndrrs(uint64_t *__addr) {
return __rndrrs(__addr);
}
#endif
+
+#if defined(__ARM_64BIT_STATE)
+
+// AArch64-LABEL: @test_atomic_store_hint_char(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i8 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 1, !aarch64.atomic.hint [[META3:![0-9]+]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_char(char *ptr, char data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_bfloat(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic bfloat [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META4:![0-9]+]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_bfloat(__bf16 *ptr, __bf16 data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 1);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_short(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i16 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_short(short *ptr, short data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_u32(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_u32(uint32_t *ptr, uint32_t data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_float(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic float [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_float(float *ptr, float data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_s64(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 8, !aarch64.atomic.hint [[META4]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_s64(int64_t *ptr, int64_t data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_long_long_int(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_long_long_int(long long int *ptr, long long int data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_double(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic double [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 8, !aarch64.atomic.hint [[META4]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_double(double *ptr, double data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1);
+}
+
+// AArch64: [[META3]] = !{i32 0}
+// AArch64-NEXT: [[META4]] = !{i32 1}
+#endif
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index 3d054c79f1777..ad9ba7feca671 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -216,4 +216,17 @@ void trap() {
__builtin_arm_trap(42);
}
+void atomic_store_with_hint(int64_t *a, int64_t b) {
+ __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELAXED, 0); // HINT_STSHH_KEEP
+ // CHECK: store atomic i64 {{.*}}, ptr {{.*}} monotonic, align 8, !aarch64.atomic.hint ![[M1:[0-9]]]
+
+ __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_SEQ_CST, 0);
+ // CHECK: store atomic i64 {{.*}}, ptr {{.*}} seq_cst, align 8, !aarch64.atomic.hint ![[M1]]
+
+ __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELEASE, 1); // HINT_STSHH_STRM
+ // CHECK: store atomic i64 {{.*}}, ptr {{.*}} release, align 8, !aarch64.atomic.hint ![[M2:[0-9]]]
+}
+
// CHECK: ![[M0]] = !{!"1:2:3:4:5"}
+// CHECK: ![[M1]] = !{i32 0}
+// CHECK: ![[M2]] = !{i32 1}
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index 41cffd7ebb1a0..fb4718a1bd1f4 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -51,3 +51,20 @@ void test_trap(short s, unsigned short us) {
__builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
__builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
}
+
+void test_atomic_store_hint(char *c_ptr, __int128 *inv_ptr, float *f_ptr,
+ char c_data, __int128 inv_data, float f_data,
+ int inv_int) {
+ __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0); // expected-error {{too few arguments to function call, expected 4, have 3}}
+ __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}}
+
+ __builtin_arm_atomic_store_with_hint(0, c_data, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
+ __builtin_arm_atomic_store_with_hint(c_ptr, f_data, 0, 0); // expected-error {{arguments are of different types ('char' vs 'float')}}
+ __builtin_arm_atomic_store_with_hint(inv_ptr, inv_data, 0, 0); // expected-error {{address argument to atomic store with hint must be of size 8, 16, 32 or 64 bits}}
+
+ __builtin_arm_atomic_store_with_hint(c_ptr, c_data, inv_int, 0); // expected-error {{invalid memory order argument to atomic hint operation ('int' invalid)}}
+ __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 2, 0); // expected-error {{invalid memory order argument to atomic hint operation (2 invalid)}}
+
+ __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, inv_int); // expected-error {{invalid hint type argument to atomic hint operation ('int' invalid)}}
+ __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 3); // expected-error {{invalid hint type argument to atomic hint operation (3 invalid)}}
+}
diff --git a/llvm/include/llvm/Support/AArch64AtomicHints.h b/llvm/include/llvm/Support/AArch64AtomicHints.h
new file mode 100644
index 0000000000000..8118f3e2df3ad
--- /dev/null
+++ b/llvm/include/llvm/Support/AArch64AtomicHints.h
@@ -0,0 +1,36 @@
+//===-- AArch64AtomicHints.h - AArch64 Atomic Hint Attributes ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_AARCH64ATOMICHINTS_H
+#define LLVM_SUPPORT_AARCH64ATOMICHINTS_H
+
+namespace llvm {
+enum class AArch64AtomicStoreHint {
+ HINT_NONE = 0,
+ HINT_STSHH_KEEP = 1,
+ HINT_STSHH_STRM = 2,
+};
+
+template <typename Int> inline bool isValidAArch64AtomicHintValue(Int I) {
+ return (Int)AArch64AtomicStoreHint::HINT_STSHH_KEEP <= I &&
+ I <= (Int)AArch64AtomicStoreHint::HINT_STSHH_STRM;
+}
+
+template <typename Int>
+inline AArch64AtomicStoreHint getAtomicStoreHintFromMD(Int I) {
+ switch (I) {
+ case 0:
+ return AArch64AtomicStoreHint::HINT_STSHH_KEEP;
+ case 1:
+ return AArch64AtomicStoreHint::HINT_STSHH_STRM;
+ default:
+ return AArch64AtomicStoreHint::HINT_NONE;
+ }
+}
+} // namespace llvm
+#endif // LLVM_SUPPORT_AARCH64ATOMICHINTS_H
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 960d2492c2856..3b2d60fa4526c 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -254,6 +254,8 @@ static void copyMetadataForAtomic(Instruction &Dest,
Dest.setMetadata(ID, N);
else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
Dest.setMetadata(ID, N);
+ else if (ID == Ctx.getMDKindID("aarch64.atomic.hint"))
+ Dest.setMetadata(ID, N);
// Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
// uses.
@@ -719,6 +721,7 @@ StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
NewSI->setAlignment(SI->getAlign());
NewSI->setVolatile(SI->isVolatile());
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
+ copyMetadataForAtomic(*NewSI, *SI);
LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
SI->eraseFromParent();
return NewSI;
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 5fa93da1544fc..81fb5619f57b0 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -70,6 +70,9 @@ class AArch64ExpandPseudoImpl {
MachineBasicBlock::iterator MBBI);
bool expandSVEBitwisePseudo(MachineInstr &MI, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
+ bool expandAtomicStoreHintPseudo(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned Size);
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
unsigned ExtendImm, unsigned ZeroReg,
@@ -1308,6 +1311,51 @@ bool AArch64ExpandPseudoImpl::expandFormTuplePseudo(
return true;
}
+bool AArch64ExpandPseudoImpl::expandAtomicStoreHintPseudo(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Size) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned StOpc;
+ unsigned Order = MI.getOperand(2).getImm();
+ bool Relaxed = Order == 0;
+
+ switch (Size) {
+ case 8:
+ StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
+ break;
+ case 16:
+ StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
+ break;
+ case 32:
+ StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
+ break;
+ case 64:
+ StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
+ break;
+ default:
+ llvm_unreachable("Unexpected atomic hint size.");
+ }
+
+ auto *Hint = BuildMI(MBB, MBBI, DL, TII->get(AArch64::STSHH))
+ .addImm(MI.getOperand(3).getImm())
+ .getInstr();
+
+ auto Store = BuildMI(MBB, MBBI, DL, TII->get(StOpc))
+ .add(MI.getOperand(1))
+ .addReg(MI.getOperand(0).getReg())
+ .setMemRefs(MI.memoperands())
+ .setMIFlags(MI.getFlags());
+
+ if (Relaxed)
+ Store.addImm(0);
+
+ transferImpOps(MI, Store, Store);
+ finalizeBundle(MBB, Hint->getIterator(), MBBI->getIterator());
+ MI.eraseFromParent();
+ return true;
+}
+
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
@@ -1948,6 +1996,14 @@ bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
case AArch64::NAND_ZZZ:
case AArch64::NOR_ZZZ:
return expandSVEBitwisePseudo(MI, MBB, MBBI);
+ case AArch64::ATOMIC_STORE_HINT_B:
+ return expandAtomicStoreHintPseudo(MBB, MBBI, 8);
+ case AArch64::ATOMIC_STORE_HINT_H:
+ return expandAtomicStoreHintPseudo(MBB, MBBI, 16);
+ case AArch64::ATOMIC_STORE_HINT_S:
+ return expandAtomicStoreHintPseudo(MBB, MBBI, 32);
+ case AArch64::ATOMIC_STORE_HINT_D:
+ return expandAtomicStoreHintPseudo(MBB, MBBI, 64);
}
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ade160de983b1..ac02cc7ca017a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -21,6 +22,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/Support/AArch64AtomicHints.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -511,6 +513,10 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
bool SelectCMP_SWAP(SDNode *N);
+ bool isAtomicHintInst(SDNode *N, AArch64AtomicStoreHint Hint) const;
+ bool isAtomicSTSHH_KEEP(SDNode *N) const;
+ bool isAtomicSTSHH_STRM(SDNode *N) const;
+
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
bool Negate);
bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
@@ -4533,6 +4539,20 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
return true;
}
+bool AArch64DAGToDAGISel::isAtomicHintInst(SDNode *N,
+ AArch64AtomicStoreHint Hint) const {
+ const MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
+ return AArch64InstrInfo::decodeAtomicHintFlags(MMO->getFlags()) == Hint;
+}
+
+bool AArch64DAGToDAGISel::isAtomicSTSHH_KEEP(SDNode *N) const {
+ return isAtomicHintInst(N, AArch64AtomicStoreHint::HINT_STSHH_KEEP);
+}
+
+bool AArch64DAGToDAGISel::isAtomicSTSHH_STRM(SDNode *N) const {
+ return isAtomicHintInst(N, AArch64AtomicStoreHint::HINT_STSHH_STRM);
+}
+
bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
SDValue &Shift, bool Negate) {
if (!isa<ConstantSDNode>(N))
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 63182d31bfd7b..49d99f6f8e80d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -78,6 +78,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/Support/AArch64AtomicHints.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@@ -18662,7 +18663,25 @@ AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
I.hasMetadata(FALKOR_STRIDED_ACCESS_MD))
return MOStridedAccess;
- return MachineMemOperand::MONone;
+
+ auto Flags = MachineMemOperand::MONone;
+ const MDNode *AtomicStHint = I.getMetadata(AARCH64_ATOMIC_STORE_HINT_MD);
+ if (AtomicStHint) {
+ unsigned HintVal =
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(AtomicStHint->getOperand(0))->getValue())
+ ->getZExtValue();
+ AArch64AtomicStoreHint Hint = getAtomicStoreHintFromMD(HintVal);
+ assert(Hint != AArch64AtomicStoreHint::HINT_NONE &&
+ "Unrecognised atomic hint value requested.");
+
+ if (static_cast<unsigned>(Hint) & 0b1)
+ Flags |= MOAtomicHintBit0;
+ if (static_cast<unsigned>(Hint) & 0b10)
+ Flags |= MOAtomicHintBit1;
+ }
+
+ return Flags;
}
bool AArch64TargetLowering::isLegalInterleavedAccessType(
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 2187f21abb70f..7d719239ecc02 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -282,6 +282,115 @@ def : Pat<(relaxed_store<atomic_store_64>
(am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
(STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+//===----------------------------------
+// Atomic store with hint pseudos
+//===----------------------------------
+
+class seq_cst_store<PatFrags base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingSequentiallyConsistent = 1;
+}
+
+class atomic_hint_stshh_keep_relaxed<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val),
+ (relaxed_store<base> node:$ptr, node:$val),
+ [{ return isAtomicSTSHH_KEEP(N); }]>;
+
+class atomic_hint_stshh_keep_release<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val),
+ (releasing_store<base> node:$ptr, node:$val),
+ [{ return isAtomicSTSHH_KEEP(N); }]>;
+
+class atomic_hint_stshh_keep_seqcst<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val),
+ (seq_cst_store<base> node:$ptr, node:$val),
+ [{ return isAtomicSTSHH_KEEP(N); }]>;
+
+class atomic_hint_stshh_strm_relaxed<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val),
+ (relaxed_store<base> node:$ptr, node:$val),
+ [{ return isAtomicSTSHH_STRM(N); }]>;
+
+class atomic_hint_stshh_strm_release<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val),
+ (releasing_store<base> node:$ptr, node:$val),
+ [{ return isAtomicSTSHH_STRM(N); }]>;
+
+class atomic_hint_stshh_strm_seqcst<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val),
+ (seq_cst_store<base> node:$ptr, node:$val),
+ [{ return isAtomicSTSHH_STRM(N); }]>;
+
+class BaseStoreHintPseudo<RegisterClass regtype>
+ : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
+ i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]> {
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 1;
+ let mayStore = 1;
+}
+
+def ATOMIC_STORE_HINT_B : BaseStoreHintPseudo<GPR32>;
+def ATOMIC_STORE_HINT_H : BaseStoreHintPseudo<GPR32>;
+def ATOMIC_STORE_HINT_S : BaseStoreHintPseudo<GPR32>;
+def ATOMIC_STORE_HINT_D : BaseStoreHintPseudo<GPR64>;
+
+let AddedComplexity = 15 in {
+ def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
+ def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
+ def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
+ def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+ (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 0))>;
+
+ def : Pat<(atomic_hint_stshh_keep_release<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
+ def : Pat<(atomic_hint_stshh_keep_release<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
+ def : Pat<(atomic_hint_stshh_keep_release<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
+ def : Pat<(atomic_hint_stshh_keep_release<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+ (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 0))>;
+
+ def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
+ def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
+ def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
+ def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+ (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 0))>;
+
+ def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
+ def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
+ def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
+ def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+ (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 1))>;
+
+ def : Pat<(atomic_hint_stshh_strm_release<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
+ def : Pat<(atomic_hint_stshh_strm_release<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
+ def : Pat<(atomic_hint_stshh_strm_release<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
+ def : Pat<(atomic_hint_stshh_strm_release<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+ (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 1))>;
+
+ def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_8> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
+ def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_16> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
+ def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_32> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
+ def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_64> GPR64sp:$addr, GPR64:$data),
+ (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 1))>;
+}
+
//===----------------------------------
// Low-level exclusive operations
//===----------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 57ea2d2f2f992..1d75d3aa89f77 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2885,6 +2885,19 @@ bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
});
}
+AArch64AtomicStoreHint
+AArch64InstrInfo::decodeAtomicHintFlags(MachineMemOperand::Flags MMOFlags) {
+ unsigned AtomicHint = 0;
+ if (MMOFlags & MOAtomicHintBit0)
+ AtomicHint += 0b1;
+ if (MMOFlags & MOAtomicHintBit1)
+ AtomicHint += 0b10;
+
+ if (!isValidAArch64AtomicHintValue(AtomicHint))
+ return AArch64AtomicStoreHint::HINT_NONE;
+ return static_cast<AArch64AtomicStoreHint>(AtomicHint);
+}
+
bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
switch (Opc) {
default:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 15bd832de8d25..06fb6cbbabe5a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -16,6 +16,7 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/AArch64AtomicHints.h"
#include "llvm/Support/TypeSize.h"
#include <optional>
@@ -30,8 +31,13 @@ static const MachineMemOperand::Flags MOSuppressPair =
MachineMemOperand::MOTargetFlag1;
static const MachineMemOperand::Flags MOStridedAccess =
MachineMemOperand::MOTargetFlag2;
+static const MachineMemOperand::Flags MOAtomicHintBit0 =
+ MachineMemOperand::MOTargetFlag3;
+static const MachineMemOperand::Flags MOAtomicHintBit1 =
+ MachineMemOperand::MOTargetFlag4;
#define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
+#define AARCH64_ATOMIC_STORE_HINT_MD "aarch64.atomic.hint"
// AArch64 MachineCombiner patterns
enum AArch64MachineCombinerPattern : unsigned {
@@ -230,6 +236,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
/// Return true if the given load or store is a strided memory access.
static bool isStridedAccess(const MachineInstr &MI);
+ static AArch64AtomicStoreHint
+ decodeAtomicHintFlags(MachineMemOperand::Flags MMOFlags);
+
/// Return true if it has an unscaled load/store offset.
static bool hasUnscaledLdStOffset(unsigned Opc);
static bool hasUnscaledLdStOffset(MachineInstr &MI) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 4f4c999ab244d..fe98659b6900e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
+#include "llvm/Support/AArch64AtomicHints.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
@@ -2541,6 +2542,66 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
I.eraseFromParent();
return true;
}
+ case TargetOpcode::G_STORE: {
+ GStore &St = cast<GStore>(I);
+ auto MMO = St.getMMO();
+ LLT PtrTy = MRI.getType(St.getPointerReg());
+
+ // Only for handling atomic store with hint.
+ // Can only handle AddressSpace 0, 64-bit pointers.
+ if (!St.isAtomic() || PtrTy != LLT::pointer(0, 64)) {
+ return false;
+ }
+
+ AArch64AtomicStoreHint Hint = TII.decodeAtomicHintFlags(MMO.getFlags());
+ if (Hint == AArch64AtomicStoreHint::HINT_NONE)
+ return false;
+
+ unsigned HintOpc;
+ unsigned StoreSize = St.getMemSizeInBits().getValue();
+ Register ValueReg = St.getValueReg();
+ switch (StoreSize) {
+ case 8:
+ HintOpc = AArch64::ATOMIC_STORE_HINT_B;
+ break;
+ case 16: {
+ Register CastReg;
+ if (mi_match(ValueReg, MRI, m_GBitcast(m_Reg(CastReg)))) {
+ auto Undef = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF,
+ {&AArch64::FPR32RegClass}, {});
+ auto Ins = MIB.buildInstr(TargetOpcode::INSERT_SUBREG,
+ {&AArch64::FPR32RegClass}, {Undef, ValueReg})
+ .addImm(AArch64::hsub);
+ constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+ ValueReg = Ins.getReg(0);
+ }
+ HintOpc = AArch64::ATOMIC_STORE_HINT_H;
+ break;
+ }
+ case 32:
+ HintOpc = AArch64::ATOMIC_STORE_HINT_S;
+ break;
+ case 64:
+ HintOpc = AArch64::ATOMIC_STORE_HINT_D;
+ break;
+ default:
+ llvm_unreachable("Unexpected getMemSizeInBits() value for atomic hint.");
+ }
+
+ unsigned HintImm = Hint == AArch64AtomicStoreHint::HINT_STSHH_KEEP ? 0 : 1;
+
+ auto StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc))
+ .addReg(St.getPointerReg())
+ .addReg(ValueReg)
+ .addImm((int)toCABI(St.getMMO().getSuccessOrdering()))
+ .addImm(static_cast<unsigned>(HintImm));
+
+ StrPseudo.cloneMemRefs(I);
+ I.eraseFromParent();
+ constrainSelectedInstRegOperands(*StrPseudo, TII, TRI, RBI);
+ return true;
+ }
default:
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
new file mode 100644
index 0000000000000..dfcfa92cbc6c8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
@@ -0,0 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel=1 -verify-machineinstrs < %s | FileCheck %s
+
+;
+; STSHH: Keep, Relaxed
+;
+
+define dso_local void @test_atomic_store_keep_relaxed_i8(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: strb w1, [x0]
+; CHECK-NEXT: ret
+ store atomic i8 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define dso_local void @test_atomic_store_keep_relaxed_i16(ptr %ptr, i16 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: strh w1, [x0]
+; CHECK-NEXT: ret
+ store atomic i16 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define dso_local void @test_atomic_store_keep_relaxed_i32(ptr %ptr, i32 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: str w1, [x0]
+; CHECK-NEXT: ret
+ store atomic i32 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define dso_local void @test_atomic_store_keep_relaxed_i64(ptr %ptr, i64 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_relaxed_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: str x1, [x0]
+; CHECK-NEXT: ret
+ store atomic i64 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+;
+; STSHH: Keep, Release
+;
+
+define dso_local void @test_atomic_store_keep_release_bfloat(ptr %ptr, bfloat %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_bfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: stlrh w8, [x0]
+; CHECK-NEXT: ret
+ store atomic bfloat %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define dso_local void @test_atomic_store_keep_release_half(ptr %ptr, half %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_half:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: stlrh w8, [x0]
+; CHECK-NEXT: ret
+ store atomic half %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define dso_local void @test_atomic_store_keep_release_float(ptr %ptr, float %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: stlr w8, [x0]
+; CHECK-NEXT: ret
+ store atomic float %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define dso_local void @test_atomic_store_keep_release_double(ptr %ptr, double %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_release_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: stlr x8, [x0]
+; CHECK-NEXT: ret
+ store atomic double %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+;
+; STSHH: Keep, SequentiallyConsistent
+;
+
+define dso_local void @test_atomic_store_keep_seqcst_i8(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: stlrb w1, [x0]
+; CHECK-NEXT: ret
+ store atomic i8 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define dso_local void @test_atomic_store_keep_seqcst_i16(ptr %ptr, i16 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: stlrh w1, [x0]
+; CHECK-NEXT: ret
+ store atomic i16 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define dso_local void @test_atomic_store_keep_seqcst_i32(ptr %ptr, i32 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: stlr w1, [x0]
+; CHECK-NEXT: ret
+ store atomic i32 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define dso_local void @test_atomic_store_keep_seqcst_i64(ptr %ptr, i64 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_keep_seqcst_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: stlr x1, [x0]
+; CHECK-NEXT: ret
+ store atomic i64 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+;
+; STSHH: Stream, Relaxed
+;
+
+define dso_local void @test_atomic_store_strm_relaxed_bfloat(ptr %ptr, bfloat %val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_bfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: strh w8, [x0]
+; CHECK-NEXT: ret
+ store atomic bfloat %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define dso_local void @test_atomic_store_strm_relaxed_half(ptr %ptr, half %val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_half:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: strh w8, [x0]
+; CHECK-NEXT: ret
+ store atomic half %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define dso_local void @test_atomic_store_strm_relaxed_float(ptr %ptr, float %val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: str w8, [x0]
+; CHECK-NEXT: ret
+ store atomic float %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define dso_local void @test_atomic_store_strm_relaxed_double(ptr %ptr, double %val) nounwind {
+; CHECK-LABEL: test_atomic_store_strm_relaxed_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store atomic double %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+;
+; STSHH: Stream, Release
+;
+
+define dso_local void @test_atomic_store_stream_release_i8(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: stlrb w1, [x0]
+; CHECK-NEXT: ret
+ store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define dso_local void @test_atomic_store_stream_release_i16(ptr %ptr, i16 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: stlrh w1, [x0]
+; CHECK-NEXT: ret
+ store atomic i16 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define dso_local void @test_atomic_store_stream_release_i32(ptr %ptr, i32 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: stlr w1, [x0]
+; CHECK-NEXT: ret
+ store atomic i32 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define dso_local void @test_atomic_store_stream_release_i64(ptr %ptr, i64 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_release_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: stlr x1, [x0]
+; CHECK-NEXT: ret
+ store atomic i64 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+;
+; STSHH: Stream, SequentiallyConsistent
+;
+
+define dso_local void @test_atomic_store_stream_seqcst_bfloat(ptr %ptr, bfloat %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_bfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: stlrh w8, [x0]
+; CHECK-NEXT: ret
+ store atomic bfloat %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define dso_local void @test_atomic_store_stream_seqcst_i16(ptr %ptr, half %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: stlrh w8, [x0]
+; CHECK-NEXT: ret
+ store atomic half %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define dso_local void @test_atomic_store_stream_seqcst_i32(ptr %ptr, float %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: stlr w8, [x0]
+; CHECK-NEXT: ret
+ store atomic float %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define dso_local void @test_atomic_store_stream_seqcst_double(ptr %ptr, double %val) nounwind {
+; CHECK-LABEL: test_atomic_store_stream_seqcst_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: stlr x8, [x0]
+; CHECK-NEXT: ret
+ store atomic double %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+!0 = !{i32 0}
+!1 = !{i32 1}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
new file mode 100644
index 0000000000000..ed69efafb04c3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
@@ -0,0 +1,30 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+ define void @test_atomic_store_keep_release_i8(ptr %ptr, i8 %val) {
+ store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
+ ret void
+ }
+
+ !0 = !{i32 0}
+...
+
+---
+name: test_atomic_store_keep_release_i8
+liveins:
+ - { reg: '$x0', virtual-reg: '' }
+ - { reg: '$w1', virtual-reg: '' }
+body: |
+ bb.0 (%ir-block.0):
+ liveins: $w1, $x0
+
+ ; CHECK-LABEL: name: test_atomic_store_keep_release_i8
+ ; CHECK: BUNDLE implicit killed $w1, implicit $x0 :: (store release (s8) into %ir.ptr, align 8) {
+ ; CHECK-NEXT: STSHH 0
+ ; CHECK-NEXT: STRBBui killed renamable $w1, $x0, 0 :: (store release (s8) into %ir.ptr, align 8)
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: RET undef $lr
+
+ ATOMIC_STORE_HINT_B killed renamable $x0, killed renamable $w1, 0, 0 :: (store release (s8) into %ir.ptr, align 8)
+ RET_ReallyLR
+...
>From 189353e701d9e647684c22107c462512713ff4c9 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 19 May 2026 10:43:46 +0000
Subject: [PATCH 2/4] Move expandAtomicStoreHintPseudo to AArch64AsmPrinter.cpp
---
llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 51 +++++++++++++++++
.../AArch64/AArch64ExpandPseudoInsts.cpp | 56 -------------------
.../lib/Target/AArch64/AArch64InstrAtomics.td | 1 +
.../Atomics/aarch64-atomic-store-hint.mir | 30 ----------
4 files changed, 52 insertions(+), 86 deletions(-)
delete mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index b16c0460adf38..11c26bb42d423 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -278,6 +278,9 @@ class AArch64AsmPrinter : public AsmPrinter {
// Emit expansion of Compare-and-branch pseudo instructions
void emitCBPseudoExpansion(const MachineInstr *MI);
+ // Emit expansion of atomic store with hint pseudo instructions
+ void emitAtomicHintPseudoExpansion(const MachineInstr *MI, unsigned Size);
+
void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
void EmitToStreamer(const MCInst &Inst) {
EmitToStreamer(*OutStreamer, Inst);
@@ -3126,6 +3129,42 @@ void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, Inst);
}
+void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI,
+ unsigned Size) {
+
+ unsigned StOpc;
+ unsigned Order = MI->getOperand(2).getImm();
+ bool Relaxed = Order == 0;
+ switch (Size) {
+ case 8:
+ StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
+ break;
+ case 16:
+ StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
+ break;
+ case 32:
+ StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
+ break;
+ case 64:
+ StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
+ break;
+ default:
+ llvm_unreachable("Unexpected atomic hint size.");
+ }
+
+ EmitToStreamer(
+ MCInstBuilder(AArch64::STSHH).addImm(MI->getOperand(3).getImm()));
+
+ MCInst Store;
+ Store.setOpcode(StOpc);
+ Store.addOperand(MCOperand::createReg(MI->getOperand(1).getReg()));
+ Store.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+ Store.setFlags(MI->getFlags());
+ if (Relaxed)
+ Store.addOperand(MCOperand::createImm(0));
+ EmitToStreamer(*OutStreamer, Store);
+}
+
// Simple pseudo-instructions have their lowering (with expansion to real
// instructions) auto-generated.
#include "AArch64GenMCPseudoLowering.inc"
@@ -3813,6 +3852,18 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
case AArch64::CBXPrr:
emitCBPseudoExpansion(MI);
return;
+ case AArch64::ATOMIC_STORE_HINT_B:
+ emitAtomicHintPseudoExpansion(MI, 8);
+ return;
+ case AArch64::ATOMIC_STORE_HINT_H:
+ emitAtomicHintPseudoExpansion(MI, 16);
+ return;
+ case AArch64::ATOMIC_STORE_HINT_S:
+ emitAtomicHintPseudoExpansion(MI, 32);
+ return;
+ case AArch64::ATOMIC_STORE_HINT_D:
+ emitAtomicHintPseudoExpansion(MI, 64);
+ return;
}
if (emitDeactivationSymbolRelocation(MI->getDeactivationSymbol()))
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 81fb5619f57b0..5fa93da1544fc 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -70,9 +70,6 @@ class AArch64ExpandPseudoImpl {
MachineBasicBlock::iterator MBBI);
bool expandSVEBitwisePseudo(MachineInstr &MI, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
- bool expandAtomicStoreHintPseudo(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned Size);
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
unsigned ExtendImm, unsigned ZeroReg,
@@ -1311,51 +1308,6 @@ bool AArch64ExpandPseudoImpl::expandFormTuplePseudo(
return true;
}
-bool AArch64ExpandPseudoImpl::expandAtomicStoreHintPseudo(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Size) {
- MachineInstr &MI = *MBBI;
- DebugLoc DL = MI.getDebugLoc();
-
- unsigned StOpc;
- unsigned Order = MI.getOperand(2).getImm();
- bool Relaxed = Order == 0;
-
- switch (Size) {
- case 8:
- StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
- break;
- case 16:
- StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
- break;
- case 32:
- StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
- break;
- case 64:
- StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
- break;
- default:
- llvm_unreachable("Unexpected atomic hint size.");
- }
-
- auto *Hint = BuildMI(MBB, MBBI, DL, TII->get(AArch64::STSHH))
- .addImm(MI.getOperand(3).getImm())
- .getInstr();
-
- auto Store = BuildMI(MBB, MBBI, DL, TII->get(StOpc))
- .add(MI.getOperand(1))
- .addReg(MI.getOperand(0).getReg())
- .setMemRefs(MI.memoperands())
- .setMIFlags(MI.getFlags());
-
- if (Relaxed)
- Store.addImm(0);
-
- transferImpOps(MI, Store, Store);
- finalizeBundle(MBB, Hint->getIterator(), MBBI->getIterator());
- MI.eraseFromParent();
- return true;
-}
-
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
@@ -1996,14 +1948,6 @@ bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
case AArch64::NAND_ZZZ:
case AArch64::NOR_ZZZ:
return expandSVEBitwisePseudo(MI, MBB, MBBI);
- case AArch64::ATOMIC_STORE_HINT_B:
- return expandAtomicStoreHintPseudo(MBB, MBBI, 8);
- case AArch64::ATOMIC_STORE_HINT_H:
- return expandAtomicStoreHintPseudo(MBB, MBBI, 16);
- case AArch64::ATOMIC_STORE_HINT_S:
- return expandAtomicStoreHintPseudo(MBB, MBBI, 32);
- case AArch64::ATOMIC_STORE_HINT_D:
- return expandAtomicStoreHintPseudo(MBB, MBBI, 64);
}
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 7d719239ecc02..af57d471e0411 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -325,6 +325,7 @@ class atomic_hint_stshh_strm_seqcst<PatFrag base>
class BaseStoreHintPseudo<RegisterClass regtype>
: Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]> {
+ let Size = 8;
let isCodeGenOnly = 1;
let hasSideEffects = 1;
let mayStore = 1;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
deleted file mode 100644
index ed69efafb04c3..0000000000000
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.mir
+++ /dev/null
@@ -1,30 +0,0 @@
-# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s
-
---- |
- define void @test_atomic_store_keep_release_i8(ptr %ptr, i8 %val) {
- store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0
- ret void
- }
-
- !0 = !{i32 0}
-...
-
----
-name: test_atomic_store_keep_release_i8
-liveins:
- - { reg: '$x0', virtual-reg: '' }
- - { reg: '$w1', virtual-reg: '' }
-body: |
- bb.0 (%ir-block.0):
- liveins: $w1, $x0
-
- ; CHECK-LABEL: name: test_atomic_store_keep_release_i8
- ; CHECK: BUNDLE implicit killed $w1, implicit $x0 :: (store release (s8) into %ir.ptr, align 8) {
- ; CHECK-NEXT: STSHH 0
- ; CHECK-NEXT: STRBBui killed renamable $w1, $x0, 0 :: (store release (s8) into %ir.ptr, align 8)
- ; CHECK-NEXT: }
- ; CHECK-NEXT: RET undef $lr
-
- ATOMIC_STORE_HINT_B killed renamable $x0, killed renamable $w1, 0, 0 :: (store release (s8) into %ir.ptr, align 8)
- RET_ReallyLR
-...
>From d9a3b6dfe3925ef1206fcd822d0c64b46fc1ba62 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Fri, 12 Jun 2026 10:38:00 +0000
Subject: [PATCH 3/4] - Document aarch64.atomic.hint in LangRef - Rewrite
patterns - Use LLVM ordering in patterns & expansion - Add Clang tests for
more types - Remove AARCH64_ATOMIC_STORE_HINT_MD
---
.../clang/Basic/DiagnosticSemaKinds.td | 10 +-
clang/lib/Sema/SemaARM.cpp | 9 +-
clang/test/CodeGen/arm_acle.c | 90 ++++++++++++++
clang/test/Sema/builtins-arm64.c | 6 +-
llvm/docs/LangRef.rst | 34 +++++-
llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 2 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 2 +-
.../lib/Target/AArch64/AArch64InstrAtomics.td | 115 ++++--------------
llvm/lib/Target/AArch64/AArch64InstrInfo.h | 1 -
.../GISel/AArch64InstructionSelector.cpp | 32 ++++-
10 files changed, 192 insertions(+), 109 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 11ddd5b61e4cb..975f37fb07a7d 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9605,6 +9605,11 @@ def err_atomic_builtin_must_be_pointer : Error<
def err_atomic_builtin_must_be_pointer_intptr : Error<
"address argument to atomic builtin must be a pointer to integer or pointer"
" (%0 invalid)">;
+
+def err_atomic_hint_builtin_must_be_pointer : Error<
+ "address argument to atomic hint builtin must be a pointer to a scalar "
+ "integral or floating-point type of 8, 16, 32, or 64 bits (%0 invalid)">;
+
def err_atomic_builtin_cannot_be_const : Error<
"address argument to atomic builtin cannot be const-qualified (%0 invalid)">;
def err_atomic_builtin_must_be_pointer_intfltptr : Error<
@@ -9671,8 +9676,9 @@ def err_atomic_op_hint_data_size : Error<
"address argument to atomic store with hint must be of size 8, 16, 32 or 64 bits">;
def err_atomic_hint_has_invalid_memory_order : Error<
"invalid memory order argument to atomic hint operation (%0 invalid)">;
-def err_atomic_hint_has_invalid_hint_type : Error<
- "invalid hint type argument to atomic hint operation (%0 invalid)">;
+def warn_atomic_hint_has_invalid_hint_type : Warning<
+ "unrecognised hint type argument to atomic hint operation (%0)">,
+ InGroup<DiagGroup<"atomic-hint-type">>;
def warn_atomic_op_has_invalid_memory_order : Warning<
"%select{|success |failure }0memory order argument to atomic operation is invalid">,
InGroup<DiagGroup<"atomic-memory-ordering">>;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 78f83d18deab8..33d1750287b03 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -338,7 +338,7 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
auto *PtrTy = PtrArg->getType()->getAs<PointerType>();
if (!PtrTy)
return Diag(TheCall->getBeginLoc(),
- diag::err_atomic_builtin_must_be_pointer)
+ diag::err_atomic_hint_builtin_must_be_pointer)
<< PtrArg->getType() << 0 << PtrArg->getSourceRange();
QualType PtrQT = PtrTy->getPointeeType();
@@ -348,8 +348,7 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
diag::err_atomic_op_needs_atomic_int_or_fp)
<< 0 << PtrQT << PtrArg->getSourceRange();
- unsigned TySize =
- Context.getTypeSize(Context.getCanonicalType(PtrQT).getUnqualifiedType());
+ unsigned TySize = Context.getTypeSize(PtrQT);
if (TySize != 8 && TySize != 16 && TySize != 32 && TySize != 64)
return Diag(TheCall->getBeginLoc(), diag::err_atomic_op_hint_data_size)
<< PtrArg->getSourceRange();
@@ -398,14 +397,14 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
std::optional<llvm::APSInt> HintAP = HintArg->getIntegerConstantExpr(Context);
if (!HintAP)
return Diag(TheCall->getBeginLoc(),
- diag::err_atomic_hint_has_invalid_hint_type)
+ diag::warn_atomic_hint_has_invalid_hint_type)
<< HintArg->getType() << HintArg->getSourceRange();
unsigned Hint = HintAP->getZExtValue();
if (llvm::getAtomicStoreHintFromMD(Hint) ==
llvm::AArch64AtomicStoreHint::HINT_NONE)
return Diag(TheCall->getBeginLoc(),
- diag::err_atomic_hint_has_invalid_hint_type)
+ diag::warn_atomic_hint_has_invalid_hint_type)
<< *HintAP << HintArg->getSourceRange();
return false;
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index a8aa0916a8a4c..69ad674193b9d 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1833,6 +1833,24 @@ void test_atomic_store_hint_char(char *ptr, char data) {
__arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
}
+// AArch64-LABEL: @test_atomic_store_hint_uchar(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i8 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 1, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_uchar(unsigned char *ptr, unsigned char data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_schar(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i8 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 1, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_schar(signed char *ptr, signed char data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0);
+}
+
// AArch64-LABEL: @test_atomic_store_hint_bfloat(
// AArch64-NEXT: entry:
// AArch64-NEXT: store atomic bfloat [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META4:![0-9]+]]
@@ -1842,6 +1860,15 @@ void test_atomic_store_hint_bfloat(__bf16 *ptr, __bf16 data) {
__arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 1);
}
+// AArch64-LABEL: @test_atomic_store_hint_half(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic half [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META4:![0-9]+]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_half(__fp16 *ptr, __fp16 data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 1);
+}
+
// AArch64-LABEL: @test_atomic_store_hint_short(
// AArch64-NEXT: entry:
// AArch64-NEXT: store atomic i16 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META3]]
@@ -1851,6 +1878,33 @@ void test_atomic_store_hint_short(short *ptr, short data) {
__arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
}
+// AArch64-LABEL: @test_atomic_store_hint_ushort(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i16 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_ushort(unsigned short *ptr, unsigned short data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_int(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_int(int *ptr, int data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_unsigned(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_unsigned(unsigned *ptr, unsigned data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
// AArch64-LABEL: @test_atomic_store_hint_u32(
// AArch64-NEXT: entry:
// AArch64-NEXT: store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
@@ -1860,6 +1914,15 @@ void test_atomic_store_hint_u32(uint32_t *ptr, uint32_t data) {
__arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
}
+// AArch64-LABEL: @test_atomic_store_hint_s32(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_s32(int32_t *ptr, int32_t data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0);
+}
+
// AArch64-LABEL: @test_atomic_store_hint_float(
// AArch64-NEXT: entry:
// AArch64-NEXT: store atomic float [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]]
@@ -1878,6 +1941,24 @@ void test_atomic_store_hint_s64(int64_t *ptr, int64_t data) {
__arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1);
}
+// AArch64-LABEL: @test_atomic_store_hint_long(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_long(long *ptr, long data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
+// AArch64-LABEL: @test_atomic_store_hint_ulong(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_ulong(unsigned long *ptr, unsigned long data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
// AArch64-LABEL: @test_atomic_store_hint_long_long_int(
// AArch64-NEXT: entry:
// AArch64-NEXT: store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]]
@@ -1887,6 +1968,15 @@ void test_atomic_store_hint_long_long_int(long long int *ptr, long long int data
__arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
}
+// AArch64-LABEL: @test_atomic_store_hint_long_long_uint(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]]
+// AArch64-NEXT: ret void
+//
+void test_atomic_store_hint_long_long_uint(unsigned long long int *ptr, unsigned long long int data) {
+ __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0);
+}
+
// AArch64-LABEL: @test_atomic_store_hint_double(
// AArch64-NEXT: entry:
// AArch64-NEXT: store atomic double [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 8, !aarch64.atomic.hint [[META4]]
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index fb4718a1bd1f4..5372aff24b739 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -58,13 +58,13 @@ void test_atomic_store_hint(char *c_ptr, __int128 *inv_ptr, float *f_ptr,
__builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0); // expected-error {{too few arguments to function call, expected 4, have 3}}
__builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}}
- __builtin_arm_atomic_store_with_hint(0, c_data, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
+ __builtin_arm_atomic_store_with_hint(0, c_data, 0, 0); // expected-error {{address argument to atomic hint builtin must be a pointer to a scalar integral or floating-point type of 8, 16, 32, or 64 bits ('int' invalid)}}
__builtin_arm_atomic_store_with_hint(c_ptr, f_data, 0, 0); // expected-error {{arguments are of different types ('char' vs 'float')}}
__builtin_arm_atomic_store_with_hint(inv_ptr, inv_data, 0, 0); // expected-error {{address argument to atomic store with hint must be of size 8, 16, 32 or 64 bits}}
__builtin_arm_atomic_store_with_hint(c_ptr, c_data, inv_int, 0); // expected-error {{invalid memory order argument to atomic hint operation ('int' invalid)}}
__builtin_arm_atomic_store_with_hint(c_ptr, c_data, 2, 0); // expected-error {{invalid memory order argument to atomic hint operation (2 invalid)}}
- __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, inv_int); // expected-error {{invalid hint type argument to atomic hint operation ('int' invalid)}}
- __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 3); // expected-error {{invalid hint type argument to atomic hint operation (3 invalid)}}
+ __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, inv_int); // expected-warning {{unrecognised hint type argument to atomic hint operation ('int')}}
+ __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 3); // expected-warning {{unrecognised hint type argument to atomic hint operation (3)}}
}
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 92cbfab50c8ef..1b459b35709c8 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -8683,6 +8683,33 @@ to the SSA value of the pointer operand.
Note that this is an experimental feature, which means that its semantics might
change in the future.
+.. _md_aarch64.atomic.hint:
+
+'``aarch64.atomic.hint``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``aarch64.atomic.hint`` metadata may be attached to an atomic store
+instruction, referencing a single metadata node containing a single ``i32``
+entry:
+
+.. code-block:: llvm
+
+ store atomic i64 %x, ptr %y seq_cst, align 8, !aarch64.atomic.hint !0
+
+ ...
+ !0 = !{i32 1}
+
+On AArch64 targets, this metadata may be used to emit an atomic store together
+with a hint instruction. The hint is a suggestion to the compiler which may be
+used when selecting code sequences, but it is not required to emit a specific
+hint instruction. The following hint values are currently recognised:
+
+ * ``0``: ``stshh keep`` hint.
+ * ``1``: ``stshh strm`` hint.
+
+If the compiler does not recognise the hint value provided, it may ignore the
+metadata. Targets that do not support this metadata may also ignore it.
+
'``type``' Metadata
^^^^^^^^^^^^^^^^^^^
@@ -12154,9 +12181,10 @@ Syntax:
::
store [volatile] <ty> <value>, ptr <pointer>[, align <alignment>][, !nontemporal !<nontemp_node>][, !invariant.group !<empty_node>] ; yields void
- store atomic [volatile] <ty> <value>, ptr <pointer> [syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group !<empty_node>] ; yields void
+ store atomic [volatile] <ty> <value>, ptr <pointer> [syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group !<empty_node>][, !aarch64.atomic.hint !<aarch64_hint_value>]; yields void
!<nontemp_node> = !{ i32 1 }
!<empty_node> = !{}
+ !<aarch64_hint_value> = !{ i32 <hint> }
Overview:
"""""""""
@@ -12212,6 +12240,10 @@ x86.
The optional ``!invariant.group`` metadata must reference a
single metadata name ``<empty_node>``. See ``invariant.group`` metadata.
+The optional ``!aarch64.atomic.hint`` metadata must reference a single metadata
+name ``<aarch64_hint_value>``. See ``aarch64.atomic.hint`` metadata.
+
+
Semantics:
""""""""""
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 11c26bb42d423..e16a6ac3c9aca 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -3134,7 +3134,7 @@ void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI,
unsigned StOpc;
unsigned Order = MI->getOperand(2).getImm();
- bool Relaxed = Order == 0;
+ bool Relaxed = Order == 2;
switch (Size) {
case 8:
StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 49d99f6f8e80d..4a4b711ee7448 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18665,7 +18665,7 @@ AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
return MOStridedAccess;
auto Flags = MachineMemOperand::MONone;
- const MDNode *AtomicStHint = I.getMetadata(AARCH64_ATOMIC_STORE_HINT_MD);
+ const MDNode *AtomicStHint = I.getMetadata("aarch64.atomic.hint");
if (AtomicStHint) {
unsigned HintVal =
cast<ConstantInt>(
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index af57d471e0411..af06ef9014031 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -286,42 +286,6 @@ def : Pat<(relaxed_store<atomic_store_64>
// Atomic store with hint pseudos
//===----------------------------------
-class seq_cst_store<PatFrags base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
- let IsAtomic = 1;
- let IsAtomicOrderingSequentiallyConsistent = 1;
-}
-
-class atomic_hint_stshh_keep_relaxed<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val),
- (relaxed_store<base> node:$ptr, node:$val),
- [{ return isAtomicSTSHH_KEEP(N); }]>;
-
-class atomic_hint_stshh_keep_release<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val),
- (releasing_store<base> node:$ptr, node:$val),
- [{ return isAtomicSTSHH_KEEP(N); }]>;
-
-class atomic_hint_stshh_keep_seqcst<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val),
- (seq_cst_store<base> node:$ptr, node:$val),
- [{ return isAtomicSTSHH_KEEP(N); }]>;
-
-class atomic_hint_stshh_strm_relaxed<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val),
- (relaxed_store<base> node:$ptr, node:$val),
- [{ return isAtomicSTSHH_STRM(N); }]>;
-
-class atomic_hint_stshh_strm_release<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val),
- (releasing_store<base> node:$ptr, node:$val),
- [{ return isAtomicSTSHH_STRM(N); }]>;
-
-class atomic_hint_stshh_strm_seqcst<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val),
- (seq_cst_store<base> node:$ptr, node:$val),
- [{ return isAtomicSTSHH_STRM(N); }]>;
-
class BaseStoreHintPseudo<RegisterClass regtype>
: Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]> {
@@ -336,60 +300,33 @@ def ATOMIC_STORE_HINT_H : BaseStoreHintPseudo<GPR32>;
def ATOMIC_STORE_HINT_S : BaseStoreHintPseudo<GPR32>;
def ATOMIC_STORE_HINT_D : BaseStoreHintPseudo<GPR64>;
+class atomic_hint_store<PatFrag Base, bit Rel, bit SeqCst, code Pred, code GIPred>
+ : PatFrag<(ops node:$ptr, node:$val),
+ (Base node:$val, node:$ptr), Pred> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingReleaseOrStronger = Rel;
+ let IsAtomicOrderingSequentiallyConsistent = SeqCst;
+ let GISelPredicateCode = GIPred;
+}
+
+multiclass AtomicHintPatterns<int Order, int Hint, bit Rel, bit SeqCst, code Pred, code GIPred> {
+ def : Pat<(atomic_hint_store<atomic_store_8, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_16, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_32, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+ (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_64, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR64:$data),
+ (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 Order), (i32 Hint))>;
+}
+
let AddedComplexity = 15 in {
- def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_8> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
- def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_16> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
- def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_32> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>;
- def : Pat<(atomic_hint_stshh_keep_relaxed<atomic_store_64> GPR64sp:$addr, GPR64:$data),
- (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 0))>;
-
- def : Pat<(atomic_hint_stshh_keep_release<atomic_store_8> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
- def : Pat<(atomic_hint_stshh_keep_release<atomic_store_16> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
- def : Pat<(atomic_hint_stshh_keep_release<atomic_store_32> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>;
- def : Pat<(atomic_hint_stshh_keep_release<atomic_store_64> GPR64sp:$addr, GPR64:$data),
- (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 0))>;
-
- def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_8> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
- def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_16> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
- def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_32> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>;
- def : Pat<(atomic_hint_stshh_keep_seqcst<atomic_store_64> GPR64sp:$addr, GPR64:$data),
- (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 0))>;
-
- def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_8> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
- def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_16> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
- def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_32> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>;
- def : Pat<(atomic_hint_stshh_strm_relaxed<atomic_store_64> GPR64sp:$addr, GPR64:$data),
- (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 1))>;
-
- def : Pat<(atomic_hint_stshh_strm_release<atomic_store_8> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
- def : Pat<(atomic_hint_stshh_strm_release<atomic_store_16> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
- def : Pat<(atomic_hint_stshh_strm_release<atomic_store_32> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>;
- def : Pat<(atomic_hint_stshh_strm_release<atomic_store_64> GPR64sp:$addr, GPR64:$data),
- (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 1))>;
-
- def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_8> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
- def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_16> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
- def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_32> GPR64sp:$addr, GPR32:$data),
- (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>;
- def : Pat<(atomic_hint_stshh_strm_seqcst<atomic_store_64> GPR64sp:$addr, GPR64:$data),
- (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 1))>;
+ defm : AtomicHintPatterns<2, 0, 0, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+ defm : AtomicHintPatterns<5, 0, 1, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+ defm : AtomicHintPatterns<7, 0, 0, 1, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+ defm : AtomicHintPatterns<2, 1, 0, 0, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+ defm : AtomicHintPatterns<5, 1, 1, 0, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+ defm : AtomicHintPatterns<7, 1, 0, 1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
}
//===----------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 06fb6cbbabe5a..1eb5dac67ea2a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -37,7 +37,6 @@ static const MachineMemOperand::Flags MOAtomicHintBit1 =
MachineMemOperand::MOTargetFlag4;
#define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
-#define AARCH64_ATOMIC_STORE_HINT_MD "aarch64.atomic.hint"
// AArch64 MachineCombiner patterns
enum AArch64MachineCombinerPattern : unsigned {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index fe98659b6900e..3c1f7e6213a36 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -522,6 +522,11 @@ class AArch64InstructionSelector : public InstructionSelector {
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
+ bool isAtomicHintInst(const MachineInstr &MI,
+ AArch64AtomicStoreHint Hint) const;
+ bool isAtomicSTSHH_KEEP(const MachineInstr &MI) const;
+ bool isAtomicSTSHH_STRM(const MachineInstr &MI) const;
+
/// Return true if \p MI is a load or store of \p NumBytes bytes.
bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
@@ -2546,17 +2551,15 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
GStore &St = cast<GStore>(I);
auto MMO = St.getMMO();
LLT PtrTy = MRI.getType(St.getPointerReg());
+ AArch64AtomicStoreHint Hint = TII.decodeAtomicHintFlags(MMO.getFlags());
// Only for handling atomic store with hint.
// Can only handle AddressSpace 0, 64-bit pointers.
- if (!St.isAtomic() || PtrTy != LLT::pointer(0, 64)) {
+ if (!St.isAtomic() || PtrTy != LLT::pointer(0, 64) ||
+ Hint == AArch64AtomicStoreHint::HINT_NONE) {
return false;
}
- AArch64AtomicStoreHint Hint = TII.decodeAtomicHintFlags(MMO.getFlags());
- if (Hint == AArch64AtomicStoreHint::HINT_NONE)
- return false;
-
unsigned HintOpc;
unsigned StoreSize = St.getMemSizeInBits().getValue();
Register ValueReg = St.getValueReg();
@@ -2594,7 +2597,7 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
auto StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc))
.addReg(St.getPointerReg())
.addReg(ValueReg)
- .addImm((int)toCABI(St.getMMO().getSuccessOrdering()))
+ .addImm((int)MMO.getSuccessOrdering())
.addImm(static_cast<unsigned>(HintImm));
StrPseudo.cloneMemRefs(I);
@@ -8089,6 +8092,23 @@ void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
.getZExtValue()));
}
+bool AArch64InstructionSelector::isAtomicHintInst(
+ const MachineInstr &MI, AArch64AtomicStoreHint Hint) const {
+ const GStore &St = cast<GStore>(MI);
+ auto MMO = St.getMMO();
+ return AArch64InstrInfo::decodeAtomicHintFlags(MMO.getFlags()) == Hint;
+}
+
+bool AArch64InstructionSelector::isAtomicSTSHH_KEEP(
+ const MachineInstr &MI) const {
+ return isAtomicHintInst(MI, AArch64AtomicStoreHint::HINT_STSHH_KEEP);
+}
+
+bool AArch64InstructionSelector::isAtomicSTSHH_STRM(
+ const MachineInstr &MI) const {
+ return isAtomicHintInst(MI, AArch64AtomicStoreHint::HINT_STSHH_STRM);
+}
+
bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
const MachineInstr &MI, unsigned NumBytes) const {
if (!MI.mayLoadOrStore())
>From 5c39cc14b76da6e90a17d6f441fb6354012e9a79 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 23 Jun 2026 10:28:36 +0000
Subject: [PATCH 4/4] - Add support for different addressing modes when the
ordering is relaxed - Set IsAtomicOrderingMonotonic & IsAtomicOrderingRelease
in atomic_hint_store - Do not attach invalid hints to atomic_store & ignore
any invalid hints found
---
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 18 +-
clang/lib/Sema/SemaARM.cpp | 19 +-
clang/test/CodeGen/builtins-arm64.c | 4 +
llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 143 +++++-
.../Target/AArch64/AArch64ISelLowering.cpp | 2 -
.../lib/Target/AArch64/AArch64InstrAtomics.td | 137 ++++--
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 -
.../GISel/AArch64InstructionSelector.cpp | 92 ++--
.../Atomics/aarch64-atomic-store-hint.ll | 62 ++-
.../Atomics/aarch64-relaxed-store-hint.ll | 416 ++++++++++++++++++
10 files changed, 780 insertions(+), 115 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-relaxed-store-hint.ll
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 4a13767268f96..555c0a9f33c5d 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -2167,15 +2167,15 @@ static Value *EmitAtomicStoreWithHintBuiltin(CodeGenFunction &CGF,
llvm_unreachable(
"Expected integer hint argument to atomic store with hint.");
unsigned HintArg = Result.Val.getInt().getExtValue();
- assert((getAtomicStoreHintFromMD(HintArg) !=
- AArch64AtomicStoreHint::HINT_NONE) &&
- "Invalid hint type");
-
- MDNode *HintMDVal =
- MDNode::get(CGM.getLLVMContext(),
- llvm::ConstantAsMetadata::get(Builder.getInt32(HintArg)));
- Store->setMetadata(CGM.getModule().getMDKindID("aarch64.atomic.hint"),
- HintMDVal);
+
+ // Attach the hint if valid
+ if (getAtomicStoreHintFromMD(HintArg) != AArch64AtomicStoreHint::HINT_NONE) {
+ MDNode *HintMDVal =
+ MDNode::get(CGM.getLLVMContext(),
+ llvm::ConstantAsMetadata::get(Builder.getInt32(HintArg)));
+ Store->setMetadata(CGM.getModule().getMDKindID("aarch64.atomic.hint"),
+ HintMDVal);
+ }
return Store;
}
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 33d1750287b03..11078d0578240 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -331,10 +331,11 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
// Arg 0 should be the pointer type. The pointee type must be a
// scalar integral or floating-point type of 8, 16, 32 or 64 bits.
ASTContext &Context = getASTContext();
- Expr *PtrArg = TheCall->getArg(0);
- auto PtrArgRes = SemaRef.DefaultFunctionArrayLvalueConversion(PtrArg);
+ auto PtrArgRes =
+ SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
if (PtrArgRes.isInvalid())
return true;
+ auto *PtrArg = PtrArgRes.get();
auto *PtrTy = PtrArg->getType()->getAs<PointerType>();
if (!PtrTy)
return Diag(TheCall->getBeginLoc(),
@@ -395,17 +396,17 @@ bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID,
auto HintArg =
SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(3)).get();
std::optional<llvm::APSInt> HintAP = HintArg->getIntegerConstantExpr(Context);
- if (!HintAP)
- return Diag(TheCall->getBeginLoc(),
- diag::warn_atomic_hint_has_invalid_hint_type)
- << HintArg->getType() << HintArg->getSourceRange();
+ if (!HintAP) {
+ Diag(TheCall->getBeginLoc(), diag::warn_atomic_hint_has_invalid_hint_type)
+ << HintArg->getType() << HintArg->getSourceRange();
+ return false;
+ }
unsigned Hint = HintAP->getZExtValue();
if (llvm::getAtomicStoreHintFromMD(Hint) ==
llvm::AArch64AtomicStoreHint::HINT_NONE)
- return Diag(TheCall->getBeginLoc(),
- diag::warn_atomic_hint_has_invalid_hint_type)
- << *HintAP << HintArg->getSourceRange();
+ Diag(TheCall->getBeginLoc(), diag::warn_atomic_hint_has_invalid_hint_type)
+ << *HintAP << HintArg->getSourceRange();
return false;
}
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index ad9ba7feca671..01332f8114a11 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -225,6 +225,10 @@ void atomic_store_with_hint(int64_t *a, int64_t b) {
__builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELEASE, 1); // HINT_STSHH_STRM
// CHECK: store atomic i64 {{.*}}, ptr {{.*}} release, align 8, !aarch64.atomic.hint ![[M2:[0-9]]]
+
+ // Invalid hint should be dropped
+ __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELAXED, 2); // Invalid Hint
+ // CHECK: store atomic i64 {{.*}}, ptr {{.*}} monotonic, align 8
}
// CHECK: ![[M0]] = !{!"1:2:3:4:5"}
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index e16a6ac3c9aca..1f5814529200d 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -279,7 +279,9 @@ class AArch64AsmPrinter : public AsmPrinter {
void emitCBPseudoExpansion(const MachineInstr *MI);
// Emit expansion of atomic store with hint pseudo instructions
- void emitAtomicHintPseudoExpansion(const MachineInstr *MI, unsigned Size);
+ void emitAtomicHintPseudoExpansion(const MachineInstr *MI);
+ void emitAtomicHintPseudoExpansionRO(const MachineInstr *MI);
+ void emitAtomicHintPseudoExpansionImm(const MachineInstr *MI);
void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
void EmitToStreamer(const MCInst &Inst) {
@@ -3129,23 +3131,22 @@ void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, Inst);
}
-void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI,
- unsigned Size) {
+void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI) {
unsigned StOpc;
unsigned Order = MI->getOperand(2).getImm();
bool Relaxed = Order == 2;
- switch (Size) {
- case 8:
+ switch (MI->getOpcode()) {
+ case AArch64::ATOMIC_STORE_HINT_B:
StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB;
break;
- case 16:
+ case AArch64::ATOMIC_STORE_HINT_H:
StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH;
break;
- case 32:
+ case AArch64::ATOMIC_STORE_HINT_S:
StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW;
break;
- case 64:
+ case AArch64::ATOMIC_STORE_HINT_D:
StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX;
break;
default:
@@ -3165,6 +3166,104 @@ void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI,
EmitToStreamer(*OutStreamer, Store);
}
+void AArch64AsmPrinter::emitAtomicHintPseudoExpansionRO(
+ const MachineInstr *MI) {
+ unsigned StOpc;
+ unsigned Order = MI->getOperand(5).getImm();
+ assert(Order == 2 &&
+ "Atomic store addressing mode only supports relaxed stores");
+
+ switch (MI->getOpcode()) {
+ case AArch64::ATOMIC_STORE_HINT_BroW:
+ StOpc = AArch64::STRBBroW;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_HroW:
+ StOpc = AArch64::STRHHroW;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_SroW:
+ StOpc = AArch64::STRWroW;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_DroW:
+ StOpc = AArch64::STRXroW;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_BroX:
+ StOpc = AArch64::STRBBroX;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_HroX:
+ StOpc = AArch64::STRHHroX;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_SroX:
+ StOpc = AArch64::STRWroX;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_DroX:
+ StOpc = AArch64::STRXroX;
+ break;
+ default:
+ llvm_unreachable("Unexpected atomic hint opcode.");
+ }
+
+ EmitToStreamer(
+ MCInstBuilder(AArch64::STSHH).addImm(MI->getOperand(6).getImm()));
+
+ MCInst Store;
+ Store.setOpcode(StOpc);
+ Store.addOperand(MCOperand::createReg(MI->getOperand(2).getReg())); // Data
+ Store.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); // Rn
+ Store.addOperand(MCOperand::createReg(MI->getOperand(1).getReg())); // Rm
+ Store.addOperand(MCOperand::createImm(MI->getOperand(3).getImm())); // Signed
+ Store.addOperand(MCOperand::createImm(MI->getOperand(4).getImm())); // Shift
+ Store.setFlags(MI->getFlags());
+ EmitToStreamer(*OutStreamer, Store);
+}
+
+void AArch64AsmPrinter::emitAtomicHintPseudoExpansionImm(
+ const MachineInstr *MI) {
+ unsigned StOpc;
+ unsigned Order = MI->getOperand(3).getImm();
+ assert(Order == 2 &&
+ "Atomic store addressing mode only supports relaxed stores");
+
+ switch (MI->getOpcode()) {
+ case AArch64::ATOMIC_STORE_HINT_Bui:
+ StOpc = AArch64::STRBBui;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_Hui:
+ StOpc = AArch64::STRHHui;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_Sui:
+ StOpc = AArch64::STRWui;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_Dui:
+ StOpc = AArch64::STRXui;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_Bi:
+ StOpc = AArch64::STURBBi;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_Hi:
+ StOpc = AArch64::STURHHi;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_Si:
+ StOpc = AArch64::STURWi;
+ break;
+ case AArch64::ATOMIC_STORE_HINT_Di:
+ StOpc = AArch64::STURXi;
+ break;
+ default:
+ llvm_unreachable("Unexpected atomic hint opcode.");
+ }
+
+ EmitToStreamer(
+ MCInstBuilder(AArch64::STSHH).addImm(MI->getOperand(4).getImm()));
+
+ MCInst Store;
+ Store.setOpcode(StOpc);
+ Store.addOperand(MCOperand::createReg(MI->getOperand(1).getReg())); // Data
+ Store.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); // Rn
+ Store.addOperand(MCOperand::createImm(MI->getOperand(2).getImm())); // Imm
+ Store.setFlags(MI->getFlags());
+ EmitToStreamer(*OutStreamer, Store);
+}
+
// Simple pseudo-instructions have their lowering (with expansion to real
// instructions) auto-generated.
#include "AArch64GenMCPseudoLowering.inc"
@@ -3853,16 +3952,30 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
emitCBPseudoExpansion(MI);
return;
case AArch64::ATOMIC_STORE_HINT_B:
- emitAtomicHintPseudoExpansion(MI, 8);
- return;
case AArch64::ATOMIC_STORE_HINT_H:
- emitAtomicHintPseudoExpansion(MI, 16);
- return;
case AArch64::ATOMIC_STORE_HINT_S:
- emitAtomicHintPseudoExpansion(MI, 32);
- return;
case AArch64::ATOMIC_STORE_HINT_D:
- emitAtomicHintPseudoExpansion(MI, 64);
+ emitAtomicHintPseudoExpansion(MI);
+ return;
+ case AArch64::ATOMIC_STORE_HINT_BroW:
+ case AArch64::ATOMIC_STORE_HINT_HroW:
+ case AArch64::ATOMIC_STORE_HINT_SroW:
+ case AArch64::ATOMIC_STORE_HINT_DroW:
+ case AArch64::ATOMIC_STORE_HINT_BroX:
+ case AArch64::ATOMIC_STORE_HINT_HroX:
+ case AArch64::ATOMIC_STORE_HINT_SroX:
+ case AArch64::ATOMIC_STORE_HINT_DroX:
+ emitAtomicHintPseudoExpansionRO(MI);
+ return;
+ case AArch64::ATOMIC_STORE_HINT_Bui:
+ case AArch64::ATOMIC_STORE_HINT_Hui:
+ case AArch64::ATOMIC_STORE_HINT_Sui:
+ case AArch64::ATOMIC_STORE_HINT_Dui:
+ case AArch64::ATOMIC_STORE_HINT_Bi:
+ case AArch64::ATOMIC_STORE_HINT_Hi:
+ case AArch64::ATOMIC_STORE_HINT_Si:
+ case AArch64::ATOMIC_STORE_HINT_Di:
+ emitAtomicHintPseudoExpansionImm(MI);
return;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4a4b711ee7448..5f2687875dba6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18672,8 +18672,6 @@ AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
cast<ConstantAsMetadata>(AtomicStHint->getOperand(0))->getValue())
->getZExtValue();
AArch64AtomicStoreHint Hint = getAtomicStoreHintFromMD(HintVal);
- assert(Hint != AArch64AtomicStoreHint::HINT_NONE &&
- "Unrecognised atomic hint value requested.");
if (static_cast<unsigned>(Hint) & 0b1)
Flags |= MOAtomicHintBit0;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index af06ef9014031..e0dc12bc62de6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -286,47 +286,126 @@ def : Pat<(relaxed_store<atomic_store_64>
// Atomic store with hint pseudos
//===----------------------------------
-class BaseStoreHintPseudo<RegisterClass regtype>
- : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
- i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]> {
- let Size = 8;
- let isCodeGenOnly = 1;
- let hasSideEffects = 1;
- let mayStore = 1;
-}
-
-def ATOMIC_STORE_HINT_B : BaseStoreHintPseudo<GPR32>;
-def ATOMIC_STORE_HINT_H : BaseStoreHintPseudo<GPR32>;
-def ATOMIC_STORE_HINT_S : BaseStoreHintPseudo<GPR32>;
-def ATOMIC_STORE_HINT_D : BaseStoreHintPseudo<GPR64>;
-
-class atomic_hint_store<PatFrag Base, bit Rel, bit SeqCst, code Pred, code GIPred>
- : PatFrag<(ops node:$ptr, node:$val),
+let Size = 8, isCodeGenOnly = 1, hasSideEffects = 1, mayStore = 1 in {
+ class BaseStoreHintPseudo<RegisterClass regtype>
+ : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data,
+ i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]>;
+
+ class BaseStoreHintPseudoROW<RegisterClass regtype, Operand WExt>
+ : Pseudo<(outs), (ins GPR64sp:$Rn, GPR32:$Rm, regtype:$data, WExt:$extend,
+ i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]>;
+
+ class BaseStoreHintPseudoROX<RegisterClass regtype, Operand XExt>
+ : Pseudo<(outs), (ins GPR64sp:$Rn, GPR64:$Rm, regtype:$data, XExt:$extend,
+ i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]>;
+
+ class BaseStoreHintPseudoUImm<RegisterClass regtype, Operand UImmOp>
+ : Pseudo<(outs), (ins GPR64sp:$Rn, regtype:$data, UImmOp:$offset,
+ i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]>;
+
+ class BaseStoreHintPseudoImm<RegisterClass regtype>
+ : Pseudo<(outs), (ins GPR64sp:$Rn, regtype:$data, simm9:$offset,
+ i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]>;
+}
+
+multiclass AtomicStoreHintPseudos<RegisterClass regtype, Operand WExt, Operand XExt, Operand UImmOp> {
+ def NAME : BaseStoreHintPseudo<regtype>;
+ def NAME # roW : BaseStoreHintPseudoROW<regtype, WExt>;
+ def NAME # roX : BaseStoreHintPseudoROX<regtype, XExt>;
+ def NAME # ui : BaseStoreHintPseudoUImm<regtype, UImmOp>;
+ def NAME # i : BaseStoreHintPseudoImm<regtype>;
+}
+
+defm ATOMIC_STORE_HINT_B : AtomicStoreHintPseudos<GPR32, ro_Wextend8, ro_Xextend8, uimm12s1>;
+defm ATOMIC_STORE_HINT_H : AtomicStoreHintPseudos<GPR32, ro_Wextend16, ro_Xextend16, uimm12s2>;
+defm ATOMIC_STORE_HINT_S : AtomicStoreHintPseudos<GPR32, ro_Wextend32, ro_Xextend32, uimm12s4>;
+defm ATOMIC_STORE_HINT_D : AtomicStoreHintPseudos<GPR64, ro_Wextend64, ro_Xextend64, uimm12s8>;
+
+class atomic_hint_store<PatFrag Base, int Order, code Pred, code GIPred>
+ : PatFrag<(ops node:$val, node:$ptr),
(Base node:$val, node:$ptr), Pred> {
let IsAtomic = 1;
- let IsAtomicOrderingReleaseOrStronger = Rel;
- let IsAtomicOrderingSequentiallyConsistent = SeqCst;
+ let IsAtomicOrderingMonotonic = !eq(Order, 2);
+ let IsAtomicOrderingRelease = !eq(Order, 5);
+ let IsAtomicOrderingSequentiallyConsistent = !eq(Order, 7);
let GISelPredicateCode = GIPred;
}
-multiclass AtomicHintPatterns<int Order, int Hint, bit Rel, bit SeqCst, code Pred, code GIPred> {
- def : Pat<(atomic_hint_store<atomic_store_8, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+multiclass AtomicHintPatternsAddrMode<int Hint, code Pred, code GIPred> {
+ def : Pat<(atomic_hint_store<atomic_store_8, 2, Pred, GIPred>
+ GPR32:$data, (ro_Windexed8 GPR64sp:$addr, GPR32:$Rm, ro_Wextend8:$extend)),
+ (ATOMIC_STORE_HINT_BroW GPR64sp:$addr, GPR32:$Rm, GPR32:$data, ro_Wextend8:$extend, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_16, 2, Pred, GIPred>
+ GPR32:$data, (ro_Windexed16 GPR64sp:$addr, GPR32:$Rm, ro_Wextend16:$extend)),
+ (ATOMIC_STORE_HINT_HroW GPR64sp:$addr, GPR32:$Rm, GPR32:$data, ro_Wextend16:$extend, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_32, 2, Pred, GIPred>
+ GPR32:$data, (ro_Windexed32 GPR64sp:$addr, GPR32:$Rm, ro_Wextend32:$extend)),
+ (ATOMIC_STORE_HINT_SroW GPR64sp:$addr, GPR32:$Rm, GPR32:$data, ro_Wextend32:$extend, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_64, 2, Pred, GIPred>
+ GPR64:$data, (ro_Windexed64 GPR64sp:$addr, GPR32:$Rm, ro_Wextend64:$extend)),
+ (ATOMIC_STORE_HINT_DroW GPR64sp:$addr, GPR32:$Rm, GPR64:$data, ro_Wextend64:$extend, (i32 2), (i32 Hint))>;
+
+ def : Pat<(atomic_hint_store<atomic_store_8, 2, Pred, GIPred>
+ GPR32:$data, (ro_Xindexed8 GPR64sp:$addr, GPR64:$Rm, ro_Xextend8:$extend)),
+ (ATOMIC_STORE_HINT_BroX GPR64sp:$addr, GPR64:$Rm, GPR32:$data, ro_Xextend8:$extend, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_16, 2, Pred, GIPred>
+ GPR32:$data, (ro_Xindexed16 GPR64sp:$addr, GPR64:$Rm, ro_Xextend16:$extend)),
+ (ATOMIC_STORE_HINT_HroX GPR64sp:$addr, GPR64:$Rm, GPR32:$data, ro_Xextend16:$extend, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_32, 2, Pred, GIPred>
+ GPR32:$data, (ro_Xindexed32 GPR64sp:$addr, GPR64:$Rm, ro_Xextend32:$extend)),
+ (ATOMIC_STORE_HINT_SroX GPR64sp:$addr, GPR64:$Rm, GPR32:$data, ro_Xextend32:$extend, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_64, 2, Pred, GIPred>
+ GPR64:$data, (ro_Xindexed64 GPR64sp:$addr, GPR64:$Rm, ro_Xextend64:$extend)),
+ (ATOMIC_STORE_HINT_DroX GPR64sp:$addr, GPR64:$Rm, GPR64:$data, ro_Xextend64:$extend, (i32 2), (i32 Hint))>;
+
+ def : Pat<(atomic_hint_store<atomic_store_8, 2, Pred, GIPred>
+ GPR32:$data, (am_indexed8 GPR64sp:$addr, uimm12s1:$offset)),
+ (ATOMIC_STORE_HINT_Bui GPR64sp:$addr, GPR32:$data, uimm12s1:$offset, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_16, 2, Pred, GIPred>
+ GPR32:$data, (am_indexed16 GPR64sp:$addr, uimm12s2:$offset)),
+ (ATOMIC_STORE_HINT_Hui GPR64sp:$addr, GPR32:$data, uimm12s2:$offset, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_32, 2, Pred, GIPred>
+ GPR32:$data, (am_indexed32 GPR64sp:$addr, uimm12s4:$offset)),
+ (ATOMIC_STORE_HINT_Sui GPR64sp:$addr, GPR32:$data, uimm12s4:$offset, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_64, 2, Pred, GIPred>
+ GPR64:$data, (am_indexed64 GPR64sp:$addr, uimm12s8:$offset)),
+ (ATOMIC_STORE_HINT_Dui GPR64sp:$addr, GPR64:$data, uimm12s8:$offset, (i32 2), (i32 Hint))>;
+
+ def : Pat<(atomic_hint_store<atomic_store_8, 2, Pred, GIPred>
+ GPR32:$data, (am_unscaled8 GPR64sp:$addr, simm9:$offset)),
+ (ATOMIC_STORE_HINT_Bi GPR64sp:$addr, GPR32:$data, simm9:$offset, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_16, 2, Pred, GIPred>
+ GPR32:$data, (am_unscaled16 GPR64sp:$addr, simm9:$offset)),
+ (ATOMIC_STORE_HINT_Hi GPR64sp:$addr, GPR32:$data, simm9:$offset, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_32, 2, Pred, GIPred>
+ GPR32:$data, (am_unscaled32 GPR64sp:$addr, simm9:$offset)),
+ (ATOMIC_STORE_HINT_Si GPR64sp:$addr, GPR32:$data, simm9:$offset, (i32 2), (i32 Hint))>;
+ def : Pat<(atomic_hint_store<atomic_store_64, 2, Pred, GIPred>
+ GPR64:$data, (am_unscaled64 GPR64sp:$addr, simm9:$offset)),
+ (ATOMIC_STORE_HINT_Di GPR64sp:$addr, GPR64:$data, simm9:$offset, (i32 2), (i32 Hint))>;
+}
+
+multiclass AtomicHintPatterns<int Order, int Hint, code Pred, code GIPred> {
+ def : Pat<(atomic_hint_store<atomic_store_8, Order, Pred, GIPred> GPR32:$data, GPR64sp:$addr),
(ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
- def : Pat<(atomic_hint_store<atomic_store_16, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+ def : Pat<(atomic_hint_store<atomic_store_16, Order, Pred, GIPred> GPR32:$data, GPR64sp:$addr),
(ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
- def : Pat<(atomic_hint_store<atomic_store_32, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR32:$data),
+ def : Pat<(atomic_hint_store<atomic_store_32, Order, Pred, GIPred> GPR32:$data, GPR64sp:$addr),
(ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 Order), (i32 Hint))>;
- def : Pat<(atomic_hint_store<atomic_store_64, Rel, SeqCst, Pred, GIPred> GPR64sp:$addr, GPR64:$data),
+ def : Pat<(atomic_hint_store<atomic_store_64, Order, Pred, GIPred> GPR64:$data, GPR64sp:$addr),
(ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 Order), (i32 Hint))>;
}
let AddedComplexity = 15 in {
- defm : AtomicHintPatterns<2, 0, 0, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
- defm : AtomicHintPatterns<5, 0, 1, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
- defm : AtomicHintPatterns<7, 0, 0, 1, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
- defm : AtomicHintPatterns<2, 1, 0, 0, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
- defm : AtomicHintPatterns<5, 1, 1, 0, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
- defm : AtomicHintPatterns<7, 1, 0, 1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+ defm : AtomicHintPatternsAddrMode<0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+ defm : AtomicHintPatternsAddrMode<1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+
+ defm : AtomicHintPatterns<2, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+ defm : AtomicHintPatterns<5, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+ defm : AtomicHintPatterns<7, 0, [{ return isAtomicSTSHH_KEEP(N); }], [{ return isAtomicSTSHH_KEEP(MI); }]>;
+ defm : AtomicHintPatterns<2, 1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+ defm : AtomicHintPatterns<5, 1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
+ defm : AtomicHintPatterns<7, 1, [{ return isAtomicSTSHH_STRM(N); }], [{ return isAtomicSTSHH_STRM(MI); }]>;
}
//===----------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 1d75d3aa89f77..8184e0150e964 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2893,8 +2893,6 @@ AArch64InstrInfo::decodeAtomicHintFlags(MachineMemOperand::Flags MMOFlags) {
if (MMOFlags & MOAtomicHintBit1)
AtomicHint += 0b10;
- if (!isValidAArch64AtomicHintValue(AtomicHint))
- return AArch64AtomicStoreHint::HINT_NONE;
return static_cast<AArch64AtomicStoreHint>(AtomicHint);
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 3c1f7e6213a36..525ead663ff84 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2560,14 +2560,11 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
return false;
}
- unsigned HintOpc;
- unsigned StoreSize = St.getMemSizeInBits().getValue();
+ // AtomicExpandPass converts FP stores to integer stores of the equivalent
+ // bitwidth. Widen the register here if the original type was f16.
+ unsigned StoreSize = St.getMemSize().getValue();
Register ValueReg = St.getValueReg();
- switch (StoreSize) {
- case 8:
- HintOpc = AArch64::ATOMIC_STORE_HINT_B;
- break;
- case 16: {
+ if (StoreSize == 2) {
Register CastReg;
if (mi_match(ValueReg, MRI, m_GBitcast(m_Reg(CastReg)))) {
auto Undef = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF,
@@ -2579,26 +2576,72 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
ValueReg = Ins.getReg(0);
}
- HintOpc = AArch64::ATOMIC_STORE_HINT_H;
- break;
}
- case 32:
- HintOpc = AArch64::ATOMIC_STORE_HINT_S;
- break;
- case 64:
- HintOpc = AArch64::ATOMIC_STORE_HINT_D;
- break;
- default:
- llvm_unreachable("Unexpected getMemSizeInBits() value for atomic hint.");
+
+ static constexpr unsigned BaseOpcodes[] = {
+ AArch64::ATOMIC_STORE_HINT_B, AArch64::ATOMIC_STORE_HINT_H,
+ AArch64::ATOMIC_STORE_HINT_S, AArch64::ATOMIC_STORE_HINT_D};
+ static constexpr unsigned RegWOpcodes[] = {
+ AArch64::ATOMIC_STORE_HINT_BroW, AArch64::ATOMIC_STORE_HINT_HroW,
+ AArch64::ATOMIC_STORE_HINT_SroW, AArch64::ATOMIC_STORE_HINT_DroW};
+ static constexpr unsigned RegXOpcodes[] = {
+ AArch64::ATOMIC_STORE_HINT_BroX, AArch64::ATOMIC_STORE_HINT_HroX,
+ AArch64::ATOMIC_STORE_HINT_SroX, AArch64::ATOMIC_STORE_HINT_DroX};
+ static constexpr unsigned UImmOpcodes[] = {
+ AArch64::ATOMIC_STORE_HINT_Bui, AArch64::ATOMIC_STORE_HINT_Hui,
+ AArch64::ATOMIC_STORE_HINT_Sui, AArch64::ATOMIC_STORE_HINT_Dui};
+ static constexpr unsigned ImmOpcodes[] = {
+ AArch64::ATOMIC_STORE_HINT_Bi, AArch64::ATOMIC_STORE_HINT_Hi,
+ AArch64::ATOMIC_STORE_HINT_Si, AArch64::ATOMIC_STORE_HINT_Di};
+
+ AtomicOrdering Ordering = MMO.getSuccessOrdering();
+ MachineInstrBuilder StrPseudo;
+ unsigned HintOpc = 0;
+
+ // Other addressing modes can be used when the ordering is monotonic.
+ // Try to match these first, before falling back on the basic operands.
+ auto AddModeWRO = selectAddrModeWRO(St.getOperand(1), StoreSize);
+ auto AddModeXRO = selectAddrModeXRO(St.getOperand(1), StoreSize);
+ auto AddModeUImm = selectAddrModeIndexed(St.getOperand(1), StoreSize);
+ auto AddModeImm = selectAddrModeUnscaled(St.getOperand(1), StoreSize);
+
+ if (AddModeWRO && Ordering == AtomicOrdering::Monotonic) {
+ HintOpc = RegWOpcodes[Log2_32(StoreSize)];
+ StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc));
+ (*AddModeWRO)[0](StrPseudo);
+ (*AddModeWRO)[1](StrPseudo);
+ StrPseudo.addReg(ValueReg);
+ (*AddModeWRO)[2](StrPseudo);
+ } else if (AddModeXRO && Ordering == AtomicOrdering::Monotonic) {
+ HintOpc = RegXOpcodes[Log2_32(StoreSize)];
+ StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc));
+ (*AddModeXRO)[0](StrPseudo);
+ (*AddModeXRO)[1](StrPseudo);
+ StrPseudo.addReg(ValueReg);
+ (*AddModeXRO)[2](StrPseudo);
+ } else if (AddModeUImm && Ordering == AtomicOrdering::Monotonic) {
+ HintOpc = UImmOpcodes[Log2_32(StoreSize)];
+ StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc));
+ (*AddModeUImm)[0](StrPseudo);
+ StrPseudo.addReg(ValueReg);
+ (*AddModeUImm)[1](StrPseudo);
+ } else if (AddModeImm && Ordering == AtomicOrdering::Monotonic) {
+ HintOpc = ImmOpcodes[Log2_32(StoreSize)];
+ StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc));
+ (*AddModeImm)[0](StrPseudo);
+ StrPseudo.addReg(ValueReg);
+ (*AddModeImm)[1](StrPseudo);
+ } else {
+ HintOpc = BaseOpcodes[Log2_32(StoreSize)];
+ StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc))
+ .addReg(St.getPointerReg())
+ .addReg(ValueReg);
}
+ // Add the ordering and hint operands, before erasing the store.
unsigned HintImm = Hint == AArch64AtomicStoreHint::HINT_STSHH_KEEP ? 0 : 1;
-
- auto StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc))
- .addReg(St.getPointerReg())
- .addReg(ValueReg)
- .addImm((int)MMO.getSuccessOrdering())
- .addImm(static_cast<unsigned>(HintImm));
+ StrPseudo.addImm((int)Ordering);
+ StrPseudo.addImm(HintImm);
StrPseudo.cloneMemRefs(I);
I.eraseFromParent();
@@ -8094,8 +8137,7 @@ void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
bool AArch64InstructionSelector::isAtomicHintInst(
const MachineInstr &MI, AArch64AtomicStoreHint Hint) const {
- const GStore &St = cast<GStore>(MI);
- auto MMO = St.getMMO();
+ auto MMO = cast<GStore>(MI).getMMO();
return AArch64InstrInfo::decodeAtomicHintFlags(MMO.getFlags()) == Hint;
}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
index dfcfa92cbc6c8..21aec7dae322f 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll
@@ -6,7 +6,7 @@
; STSHH: Keep, Relaxed
;
-define dso_local void @test_atomic_store_keep_relaxed_i8(ptr %ptr, i8 %val) nounwind {
+define void @test_atomic_store_keep_relaxed_i8(ptr %ptr, i8 %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_relaxed_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh keep
@@ -16,7 +16,7 @@ define dso_local void @test_atomic_store_keep_relaxed_i8(ptr %ptr, i8 %val) noun
ret void
}
-define dso_local void @test_atomic_store_keep_relaxed_i16(ptr %ptr, i16 %val) nounwind {
+define void @test_atomic_store_keep_relaxed_i16(ptr %ptr, i16 %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_relaxed_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh keep
@@ -26,7 +26,7 @@ define dso_local void @test_atomic_store_keep_relaxed_i16(ptr %ptr, i16 %val) no
ret void
}
-define dso_local void @test_atomic_store_keep_relaxed_i32(ptr %ptr, i32 %val) nounwind {
+define void @test_atomic_store_keep_relaxed_i32(ptr %ptr, i32 %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_relaxed_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh keep
@@ -36,7 +36,7 @@ define dso_local void @test_atomic_store_keep_relaxed_i32(ptr %ptr, i32 %val) no
ret void
}
-define dso_local void @test_atomic_store_keep_relaxed_i64(ptr %ptr, i64 %val) nounwind {
+define void @test_atomic_store_keep_relaxed_i64(ptr %ptr, i64 %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_relaxed_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh keep
@@ -50,7 +50,7 @@ define dso_local void @test_atomic_store_keep_relaxed_i64(ptr %ptr, i64 %val) no
; STSHH: Keep, Release
;
-define dso_local void @test_atomic_store_keep_release_bfloat(ptr %ptr, bfloat %val) nounwind {
+define void @test_atomic_store_keep_release_bfloat(ptr %ptr, bfloat %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_release_bfloat:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
@@ -62,7 +62,7 @@ define dso_local void @test_atomic_store_keep_release_bfloat(ptr %ptr, bfloat %v
ret void
}
-define dso_local void @test_atomic_store_keep_release_half(ptr %ptr, half %val) nounwind {
+define void @test_atomic_store_keep_release_half(ptr %ptr, half %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_release_half:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
@@ -74,7 +74,7 @@ define dso_local void @test_atomic_store_keep_release_half(ptr %ptr, half %val)
ret void
}
-define dso_local void @test_atomic_store_keep_release_float(ptr %ptr, float %val) nounwind {
+define void @test_atomic_store_keep_release_float(ptr %ptr, float %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_release_float:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov w8, s0
@@ -85,7 +85,7 @@ define dso_local void @test_atomic_store_keep_release_float(ptr %ptr, float %val
ret void
}
-define dso_local void @test_atomic_store_keep_release_double(ptr %ptr, double %val) nounwind {
+define void @test_atomic_store_keep_release_double(ptr %ptr, double %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_release_double:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov x8, d0
@@ -100,7 +100,7 @@ define dso_local void @test_atomic_store_keep_release_double(ptr %ptr, double %v
; STSHH: Keep, SequentiallyConsistent
;
-define dso_local void @test_atomic_store_keep_seqcst_i8(ptr %ptr, i8 %val) nounwind {
+define void @test_atomic_store_keep_seqcst_i8(ptr %ptr, i8 %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_seqcst_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh keep
@@ -110,7 +110,7 @@ define dso_local void @test_atomic_store_keep_seqcst_i8(ptr %ptr, i8 %val) nounw
ret void
}
-define dso_local void @test_atomic_store_keep_seqcst_i16(ptr %ptr, i16 %val) nounwind {
+define void @test_atomic_store_keep_seqcst_i16(ptr %ptr, i16 %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_seqcst_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh keep
@@ -120,7 +120,7 @@ define dso_local void @test_atomic_store_keep_seqcst_i16(ptr %ptr, i16 %val) nou
ret void
}
-define dso_local void @test_atomic_store_keep_seqcst_i32(ptr %ptr, i32 %val) nounwind {
+define void @test_atomic_store_keep_seqcst_i32(ptr %ptr, i32 %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_seqcst_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh keep
@@ -130,7 +130,7 @@ define dso_local void @test_atomic_store_keep_seqcst_i32(ptr %ptr, i32 %val) nou
ret void
}
-define dso_local void @test_atomic_store_keep_seqcst_i64(ptr %ptr, i64 %val) nounwind {
+define void @test_atomic_store_keep_seqcst_i64(ptr %ptr, i64 %val) nounwind {
; CHECK-LABEL: test_atomic_store_keep_seqcst_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh keep
@@ -144,7 +144,7 @@ define dso_local void @test_atomic_store_keep_seqcst_i64(ptr %ptr, i64 %val) nou
; STSHH: Stream, Relaxed
;
-define dso_local void @test_atomic_store_strm_relaxed_bfloat(ptr %ptr, bfloat %val) nounwind {
+define void @test_atomic_store_strm_relaxed_bfloat(ptr %ptr, bfloat %val) nounwind {
; CHECK-LABEL: test_atomic_store_strm_relaxed_bfloat:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
@@ -156,7 +156,7 @@ define dso_local void @test_atomic_store_strm_relaxed_bfloat(ptr %ptr, bfloat %v
ret void
}
-define dso_local void @test_atomic_store_strm_relaxed_half(ptr %ptr, half %val) nounwind {
+define void @test_atomic_store_strm_relaxed_half(ptr %ptr, half %val) nounwind {
; CHECK-LABEL: test_atomic_store_strm_relaxed_half:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
@@ -168,7 +168,7 @@ define dso_local void @test_atomic_store_strm_relaxed_half(ptr %ptr, half %val)
ret void
}
-define dso_local void @test_atomic_store_strm_relaxed_float(ptr %ptr, float %val) nounwind {
+define void @test_atomic_store_strm_relaxed_float(ptr %ptr, float %val) nounwind {
; CHECK-LABEL: test_atomic_store_strm_relaxed_float:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov w8, s0
@@ -179,7 +179,7 @@ define dso_local void @test_atomic_store_strm_relaxed_float(ptr %ptr, float %val
ret void
}
-define dso_local void @test_atomic_store_strm_relaxed_double(ptr %ptr, double %val) nounwind {
+define void @test_atomic_store_strm_relaxed_double(ptr %ptr, double %val) nounwind {
; CHECK-LABEL: test_atomic_store_strm_relaxed_double:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov x8, d0
@@ -194,7 +194,7 @@ define dso_local void @test_atomic_store_strm_relaxed_double(ptr %ptr, double %v
; STSHH: Stream, Release
;
-define dso_local void @test_atomic_store_stream_release_i8(ptr %ptr, i8 %val) nounwind {
+define void @test_atomic_store_stream_release_i8(ptr %ptr, i8 %val) nounwind {
; CHECK-LABEL: test_atomic_store_stream_release_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh strm
@@ -204,7 +204,7 @@ define dso_local void @test_atomic_store_stream_release_i8(ptr %ptr, i8 %val) no
ret void
}
-define dso_local void @test_atomic_store_stream_release_i16(ptr %ptr, i16 %val) nounwind {
+define void @test_atomic_store_stream_release_i16(ptr %ptr, i16 %val) nounwind {
; CHECK-LABEL: test_atomic_store_stream_release_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh strm
@@ -214,7 +214,7 @@ define dso_local void @test_atomic_store_stream_release_i16(ptr %ptr, i16 %val)
ret void
}
-define dso_local void @test_atomic_store_stream_release_i32(ptr %ptr, i32 %val) nounwind {
+define void @test_atomic_store_stream_release_i32(ptr %ptr, i32 %val) nounwind {
; CHECK-LABEL: test_atomic_store_stream_release_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh strm
@@ -224,7 +224,7 @@ define dso_local void @test_atomic_store_stream_release_i32(ptr %ptr, i32 %val)
ret void
}
-define dso_local void @test_atomic_store_stream_release_i64(ptr %ptr, i64 %val) nounwind {
+define void @test_atomic_store_stream_release_i64(ptr %ptr, i64 %val) nounwind {
; CHECK-LABEL: test_atomic_store_stream_release_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: stshh strm
@@ -238,7 +238,7 @@ define dso_local void @test_atomic_store_stream_release_i64(ptr %ptr, i64 %val)
; STSHH: Stream, SequentiallyConsistent
;
-define dso_local void @test_atomic_store_stream_seqcst_bfloat(ptr %ptr, bfloat %val) nounwind {
+define void @test_atomic_store_stream_seqcst_bfloat(ptr %ptr, bfloat %val) nounwind {
; CHECK-LABEL: test_atomic_store_stream_seqcst_bfloat:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
@@ -250,7 +250,7 @@ define dso_local void @test_atomic_store_stream_seqcst_bfloat(ptr %ptr, bfloat %
ret void
}
-define dso_local void @test_atomic_store_stream_seqcst_i16(ptr %ptr, half %val) nounwind {
+define void @test_atomic_store_stream_seqcst_i16(ptr %ptr, half %val) nounwind {
; CHECK-LABEL: test_atomic_store_stream_seqcst_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
@@ -262,7 +262,7 @@ define dso_local void @test_atomic_store_stream_seqcst_i16(ptr %ptr, half %val)
ret void
}
-define dso_local void @test_atomic_store_stream_seqcst_i32(ptr %ptr, float %val) nounwind {
+define void @test_atomic_store_stream_seqcst_i32(ptr %ptr, float %val) nounwind {
; CHECK-LABEL: test_atomic_store_stream_seqcst_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov w8, s0
@@ -273,7 +273,7 @@ define dso_local void @test_atomic_store_stream_seqcst_i32(ptr %ptr, float %val)
ret void
}
-define dso_local void @test_atomic_store_stream_seqcst_double(ptr %ptr, double %val) nounwind {
+define void @test_atomic_store_stream_seqcst_double(ptr %ptr, double %val) nounwind {
; CHECK-LABEL: test_atomic_store_stream_seqcst_double:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov x8, d0
@@ -284,5 +284,19 @@ define dso_local void @test_atomic_store_stream_seqcst_double(ptr %ptr, double %
ret void
}
+;
+; Invalid Hint
+;
+
+define void @test_atomic_store_invalid_hint(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_invalid_hint:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stlrb w1, [x0]
+; CHECK-NEXT: ret
+ store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !2
+ ret void
+}
+
!0 = !{i32 0}
!1 = !{i32 1}
+!2 = !{i32 2}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-relaxed-store-hint.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-relaxed-store-hint.ll
new file mode 100644
index 0000000000000..d80d9460c107c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-relaxed-store-hint.ll
@@ -0,0 +1,416 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -global-isel=1 < %s | FileCheck %s
+
+;
+; W register offset
+;
+
+; Int
+
+define void @relaxed_store_hint_roW_i8(ptr %ptr, i32 %offset, i8 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: strb w2, [x0, w1, sxtw]
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %ptr, i32 %offset
+ store atomic i8 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_roW_i16(ptr %ptr, i32 %offset, i16 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: strh w2, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %ptr, i32 %offset
+ store atomic i16 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_roW_i32(ptr %ptr, i32 %offset, i32 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: str w2, [x0, w1, sxtw #2]
+; CHECK-NEXT: ret
+ %addr = getelementptr i32, ptr %ptr, i32 %offset
+ store atomic i32 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_roW_i64(ptr %ptr, i32 %offset, i64 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: str x2, [x0, w1, sxtw #3]
+; CHECK-NEXT: ret
+ %addr = getelementptr i64, ptr %ptr, i32 %offset
+ store atomic i64 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+; FP
+
+define void @relaxed_store_hint_roW_bfloat(ptr %ptr, i32 %offset, bfloat %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_bfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: strh w8, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %addr = getelementptr bfloat, ptr %ptr, i32 %offset
+ store atomic bfloat %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_roW_half(ptr %ptr, i32 %offset, half %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_half:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: strh w8, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %addr = getelementptr half, ptr %ptr, i32 %offset
+ store atomic half %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_roW_float(ptr %ptr, i32 %offset, float %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: str w8, [x0, w1, sxtw #2]
+; CHECK-NEXT: ret
+ %addr = getelementptr float, ptr %ptr, i32 %offset
+ store atomic float %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_roW_double(ptr %ptr, i32 %offset, double %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roW_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: str x8, [x0, w1, sxtw #3]
+; CHECK-NEXT: ret
+ %addr = getelementptr double, ptr %ptr, i32 %offset
+ store atomic double %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+;
+; X register offset
+;
+
+; Int
+
+define void @relaxed_store_hint_roX_i8(ptr %ptr, i64 %offset, i8 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: strb w2, [x0, x1]
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %ptr, i64 %offset
+ store atomic i8 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_roX_i16(ptr %ptr, i64 %offset, i16 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: strh w2, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %ptr, i64 %offset
+ store atomic i16 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_roX_i32(ptr %ptr, i64 %offset, i32 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: str w2, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+ %addr = getelementptr i32, ptr %ptr, i64 %offset
+ store atomic i32 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_roX_i64(ptr %ptr, i64 %offset, i64 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: str x2, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+ %addr = getelementptr i64, ptr %ptr, i64 %offset
+ store atomic i64 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+; FP
+
+define void @relaxed_store_hint_roX_bfloat(ptr %ptr, i64 %offset, bfloat %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_bfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: strh w8, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %addr = getelementptr bfloat, ptr %ptr, i64 %offset
+ store atomic bfloat %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_roX_half(ptr %ptr, i64 %offset, half %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_half:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: strh w8, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %addr = getelementptr half, ptr %ptr, i64 %offset
+ store atomic half %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_roX_float(ptr %ptr, i64 %offset, float %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: str w8, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+ %addr = getelementptr float, ptr %ptr, i64 %offset
+ store atomic float %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_roX_double(ptr %ptr, i64 %offset, double %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_roX_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: str x8, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+ %addr = getelementptr double, ptr %ptr, i64 %offset
+ store atomic double %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+;
+; Unsigned immediate offset
+;
+
+; Int
+
+define void @relaxed_store_hint_uimm_i8(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: strb w1, [x0, #4095]
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %ptr, i32 4095
+ store atomic i8 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_uimm_i16(ptr %ptr, i16 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: strh w1, [x0, #4096]
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %ptr, i32 2048
+ store atomic i16 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_uimm_i32(ptr %ptr, i32 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: str w1, [x0, #4096]
+; CHECK-NEXT: ret
+ %addr = getelementptr i32, ptr %ptr, i32 1024
+ store atomic i32 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_uimm_i64(ptr %ptr, i64 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: str x1, [x0, #4096]
+; CHECK-NEXT: ret
+ %addr = getelementptr i64, ptr %ptr, i32 512
+ store atomic i64 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+; FP
+
+define void @relaxed_store_hint_uimm_bfloat(ptr %ptr, bfloat %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_bfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: strh w8, [x0, #4096]
+; CHECK-NEXT: ret
+ %addr = getelementptr bfloat, ptr %ptr, i32 2048
+ store atomic bfloat %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_uimm_half(ptr %ptr, half %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_half:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: strh w8, [x0, #4096]
+; CHECK-NEXT: ret
+ %addr = getelementptr half, ptr %ptr, i32 2048
+ store atomic half %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_uimm_float(ptr %ptr, float %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: str w8, [x0, #8192]
+; CHECK-NEXT: ret
+ %addr = getelementptr float, ptr %ptr, i32 2048
+ store atomic float %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_uimm_double(ptr %ptr, double %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_uimm_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: str x8, [x0, #16384]
+; CHECK-NEXT: ret
+ %addr = getelementptr double, ptr %ptr, i32 2048
+ store atomic double %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+;
+; Signed immediate offset
+;
+
+; Int
+
+define void @relaxed_store_hint_imm_i8(ptr %ptr, i8 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: sturb w1, [x0, #-256]
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %ptr, i32 -256
+ store atomic i8 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_imm_i16(ptr %ptr, i16 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: sturh w1, [x0, #-256]
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %ptr, i32 -128
+ store atomic i16 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_imm_i32(ptr %ptr, i32 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: stur w1, [x0, #-256]
+; CHECK-NEXT: ret
+ %addr = getelementptr i32, ptr %ptr, i32 -64
+ store atomic i32 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+define void @relaxed_store_hint_imm_i64(ptr %ptr, i64 %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stshh keep
+; CHECK-NEXT: stur x1, [x0, #-256]
+; CHECK-NEXT: ret
+ %addr = getelementptr i64, ptr %ptr, i32 -32
+ store atomic i64 %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !0
+ ret void
+}
+
+; FP
+
+define void @relaxed_store_hint_imm_bfloat(ptr %ptr, bfloat %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_bfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: sturh w8, [x0, #-256]
+; CHECK-NEXT: ret
+ %addr = getelementptr bfloat, ptr %ptr, i32 -128
+ store atomic bfloat %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_imm_half(ptr %ptr, half %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_half:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: sturh w8, [x0, #-256]
+; CHECK-NEXT: ret
+ %addr = getelementptr half, ptr %ptr, i32 -128
+ store atomic half %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_imm_float(ptr %ptr, float %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: sub x8, x0, #512
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: str w9, [x8]
+; CHECK-NEXT: ret
+ %addr = getelementptr float, ptr %ptr, i32 -128
+ store atomic float %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+define void @relaxed_store_hint_imm_double(ptr %ptr, double %val) nounwind {
+; CHECK-LABEL: relaxed_store_hint_imm_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: sub x8, x0, #1024
+; CHECK-NEXT: stshh strm
+; CHECK-NEXT: str x9, [x8]
+; CHECK-NEXT: ret
+ %addr = getelementptr double, ptr %ptr, i32 -128
+ store atomic double %val, ptr %addr monotonic, align 8, !aarch64.atomic.hint !1
+ ret void
+}
+
+!0 = !{ i32 0 }
+!1 = !{ i32 1 }
More information about the cfe-commits
mailing list