[clang] [llvm] [AArch64][clang][llvm] Add ACLE `stshh` atomic store builtin (PR #181386)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 13 09:05:33 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: Jonathan Thackray (jthackray)
<details>
<summary>Changes</summary>
Add `__arm_atomic_store_with_stshh` implementation as defined
in the ACLE. Validate that the arguments passed are correct, and
lower it to the `stshh` intrinsic plus an atomic store with the
allowed orderings.
Gate this on `FEAT_PCDPHINT` so that availability matches
hardware support for the `STSHH` instruction. Also change to
an `i64` immediate for `phint_op`, and add `DecodeUImm`
similar to `DecodeSImm`
---
Full diff: https://github.com/llvm/llvm-project/pull/181386.diff
10 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsAArch64.def (+3)
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+9)
- (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp (+7)
- (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+48)
- (modified) clang/lib/Headers/arm_acle.h (+6)
- (modified) clang/lib/Sema/SemaARM.cpp (+136)
- (added) clang/test/CodeGen/AArch64/pcdphint-atomic-store.c (+31)
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+2)
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+10-2)
- (modified) llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp (+13)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 5d7e956b73b87..5d747f4d9c4b2 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -135,6 +135,9 @@ TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64")
TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64")
TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
+// Atomic store with PCDPHINT
+TARGET_BUILTIN(__builtin_arm_atomic_store_with_stshh, "v.", "t", "pcdphint")
+
// Armv9.3-A Guarded Control Stack
TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs")
TARGET_BUILTIN(__builtin_arm_gcsss, "v*v*", "n", "gcs")
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 85a023435ba23..b17544a4a3daf 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9475,6 +9475,15 @@ def err_atomic_builtin_must_be_pointer_intfltptr : Error<
def err_atomic_builtin_pointer_size : Error<
"address argument to atomic builtin must be a pointer to 1,2,4,8 or 16 byte "
"type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_type : Error<
+ "address argument to '__arm_atomic_store_with_stshh' must be a pointer to an "
+ "8,16,32, or 64-bit integer type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_value_type : Error<
+ "value argument to '__arm_atomic_store_with_stshh' must be an integer of the "
+ "same size as the pointed-to type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_order : Error<
+ "memory order argument to '__arm_atomic_store_with_stshh' must be one of "
+ "__ATOMIC_RELAXED, __ATOMIC_RELEASE, or __ATOMIC_SEQ_CST">;
def err_atomic_exclusive_builtin_pointer_size : Error<
"address argument to load or store exclusive builtin must be a pointer to "
// Because the range of legal sizes for load/store exclusive varies with the
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 71cf896aede10..b2311e54415b8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -1044,6 +1044,13 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
return mlir::Value{};
}
+ if (builtinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
+ }
+
if (builtinID == clang::AArch64::BI__builtin_arm_rndr ||
builtinID == clang::AArch64::BI__builtin_arm_rndrrs) {
cgm.errorNYI(expr->getSourceRange(),
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index cb6bbfe07538e..29cf378dbaeca 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5290,6 +5290,54 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, Args);
}
+ if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
+ const Expr *Arg0 = E->getArg(0);
+ const Expr *Arg1 = E->getArg(1);
+ const Expr *Arg2 = E->getArg(2);
+ const Expr *Arg3 = E->getArg(3);
+
+ Value *StoreAddr = EmitScalarExpr(Arg0);
+ Value *StoreValue = EmitScalarExpr(Arg1);
+
+ llvm::APSInt OrderVal = Arg2->EvaluateKnownConstInt(getContext());
+ llvm::APSInt RetVal = Arg3->EvaluateKnownConstInt(getContext());
+
+ llvm::AtomicOrdering Ordering;
+ switch (OrderVal.getZExtValue()) {
+ case 0: // __ATOMIC_RELAXED
+ Ordering = llvm::AtomicOrdering::Monotonic;
+ break;
+ case 3: // __ATOMIC_RELEASE
+ Ordering = llvm::AtomicOrdering::Release;
+ break;
+ case 5: // __ATOMIC_SEQ_CST
+ Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
+ break;
+ default:
+ llvm_unreachable(
+ "unexpected memory order for __arm_atomic_store_with_stshh");
+ }
+
+ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
+ llvm::Value *Arg = llvm::ConstantInt::get(Int64Ty, RetVal.getZExtValue());
+ CallInst *HintCall = Builder.CreateCall(F, Arg);
+
+ QualType ValQT = Arg0->IgnoreParenImpCasts()
+ ->getType()
+ ->castAs<PointerType>()
+ ->getPointeeType();
+ llvm::Type *ValTy = ConvertType(ValQT);
+
+ CharUnits ValAlign = getContext().getTypeAlignInChars(ValQT);
+ Address Addr = Address(StoreAddr, ValTy, ValAlign);
+ LValue LVal = MakeAddrLValue(Addr, ValQT);
+
+ EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
+ /* isVolatile= */ false,
+ /* isInit= */ false);
+ return HintCall;
+ }
+
if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 9a6b6a837fa5a..ec06072bcc4bf 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -840,6 +840,12 @@ __rndrrs(uint64_t *__p) {
}
#endif
+/* Atomic store with PCDPHINT */
+#if defined(__ARM_FEATURE_PCDPHINT)
+#define __arm_atomic_store_with_stshh(ptr, data, memory_order, ret) \
+ __builtin_arm_atomic_store_with_stshh((ptr), (data), (memory_order), (ret))
+#endif
+
/* 11.2 Guarded Control Stack intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ void * __attribute__((__always_inline__, __nodebug__))
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 53e8c002a1962..e2b9000a6acea 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1105,6 +1105,139 @@ bool SemaARM::CheckARMBuiltinFunctionCall(const TargetInfo &TI,
}
}
+static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM &S,
+ CallExpr *TheCall) {
+ Sema &SemaRef = S.SemaRef;
+ ASTContext &Context = S.getASTContext();
+ DeclRefExpr *DRE =
+ cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+ SourceLocation Loc = DRE->getBeginLoc();
+
+ // Ensure we have the proper number of arguments.
+ if (SemaRef.checkArgCount(TheCall, 4))
+ return true;
+
+ ExprResult PtrRes =
+ SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
+
+ // Bail if conversion failed.
+ if (PtrRes.isInvalid())
+ return true;
+
+ TheCall->setArg(0, PtrRes.get());
+ Expr *PointerArg = PtrRes.get();
+
+ // Check arg 0 is a pointer type, err out if not
+ const PointerType *PointerTy = PointerArg->getType()->getAs<PointerType>();
+ if (!PointerTy) {
+ SemaRef.Diag(Loc, diag::err_atomic_builtin_must_be_pointer)
+ << PointerArg->getType() << 0 << PointerArg->getSourceRange();
+ return true;
+ }
+
+ // Reject const-qualified pointee types, with an error
+ QualType ValType = PointerTy->getPointeeType();
+ if (ValType.isConstQualified()) {
+ SemaRef.Diag(Loc, diag::err_atomic_builtin_cannot_be_const)
+ << PointerArg->getType() << PointerArg->getSourceRange();
+ return true;
+ }
+
+ // Only integer element types are supported.
+ ValType = ValType.getUnqualifiedType();
+ if (!ValType->isIntegerType()) {
+ SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_type)
+ << PointerArg->getType() << PointerArg->getSourceRange();
+ return true;
+ }
+
+ // Only 8/16/32/64-bit integers are supported.
+ unsigned Bits = Context.getTypeSize(ValType);
+ switch (Bits) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ break;
+ default:
+ SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_type)
+ << PointerArg->getType() << PointerArg->getSourceRange();
+ return true;
+ }
+
+ ExprResult ValRes =
+ SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(1));
+
+ // Bail if conversion failed.
+ if (ValRes.isInvalid())
+ return true;
+
+ // Check if value is an integer type.
+ Expr *ValArg = ValRes.get();
+ if (!ValArg->getType()->isIntegerType()) {
+ SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
+ << ValArg->getType() << ValArg->getSourceRange();
+ return true;
+ }
+
+ // Value width must match the pointee width.
+ if (Context.getTypeSize(ValArg->getType()) != Bits) {
+ SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
+ << ValArg->getType() << ValArg->getSourceRange();
+ return true;
+ }
+
+ // Prepare a cast if the value type differs
+ ExprResult ValArgRes;
+ CastKind CK =
+ ValArg->getType().getCanonicalType() == ValType.getCanonicalType()
+ ? CK_NoOp
+ : CK_IntegralCast;
+
+ // Apply cast to the pointee type.
+ ValArgRes = SemaRef.ImpCastExprToType(ValArg, ValType, CK);
+
+ // Bail if cast failed.
+ if (ValArgRes.isInvalid())
+ return true;
+
+ TheCall->setArg(1, ValArgRes.get());
+ Expr *OrderArg = TheCall->getArg(2);
+
+ // Defer validation for dependent memory_order arguments.
+ if (OrderArg->isValueDependent())
+ return false;
+
+ // Require an order value.
+ std::optional<llvm::APSInt> OrderValOpt =
+ OrderArg->getIntegerConstantExpr(Context);
+ if (!OrderValOpt) {
+ SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_order)
+ << OrderArg->getSourceRange();
+ return true;
+ }
+
+ // Validate order; not used here; used later in codegen.
+ llvm::APSInt OrderVal = *OrderValOpt;
+ int64_t Order = OrderVal.getSExtValue();
+ switch (Order) {
+ case 0:
+ case 3:
+ case 5:
+ break;
+ default:
+ SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_order)
+ << OrderArg->getSourceRange();
+ return true;
+ }
+
+ // Arg 3 (retention policy) must be between KEEP(0) and STRM(1).
+ if (SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 1))
+ return true;
+
+ return false;
+}
+
bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
unsigned BuiltinID,
CallExpr *TheCall) {
@@ -1115,6 +1248,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
return CheckARMBuiltinExclusiveCall(TI, BuiltinID, TheCall);
}
+ if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_stshh)
+ return CheckAArch64AtomicStoreWithStshhCall(*this, TheCall);
+
if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 3) ||
diff --git a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
new file mode 100644
index 0000000000000..79510be522b6a
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +pcdphint -D__ARM_FEATURE_PCDPHINT -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_acle.h>
+
+void test_u8(unsigned char *p, unsigned char v) {
+ __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+}
+// CHECK-LABEL: @test_u8
+// CHECK: call void @llvm.aarch64.stshh(i64 0)
+// CHECK-NEXT: store atomic i8 %{{.*}}, ptr %{{.*}} monotonic
+
+void test_u16(unsigned short *p, unsigned short v) {
+ __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELEASE, 1);
+}
+// CHECK-LABEL: @test_u16
+// CHECK: call void @llvm.aarch64.stshh(i64 1)
+// CHECK-NEXT: store atomic i16 %{{.*}}, ptr %{{.*}} release
+
+void test_u32(unsigned int *p, unsigned int v) {
+ __arm_atomic_store_with_stshh(p, v, __ATOMIC_SEQ_CST, 0);
+}
+// CHECK-LABEL: @test_u32
+// CHECK: call void @llvm.aarch64.stshh(i64 0)
+// CHECK-NEXT: store atomic i32 %{{.*}}, ptr %{{.*}} seq_cst
+
+void test_u64(unsigned long long *p, unsigned long long v) {
+ __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 1);
+}
+// CHECK-LABEL: @test_u64
+// CHECK: call void @llvm.aarch64.stshh(i64 1)
+// CHECK-NEXT: store atomic i64 %{{.*}}, ptr %{{.*}} monotonic
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 7f4b7383415c1..19ba3a5a740c5 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -62,6 +62,8 @@ def int_aarch64_frint64x
// HINT
def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
+def int_aarch64_stshh
+ : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrHasSideEffects]>;
def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 7d4e034ca16c8..69fb01ada0b40 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1840,16 +1840,24 @@ def PHintInstOperand : AsmOperandClass {
let ParserMethod = "tryParsePHintInstOperand";
}
-def phint_op : Operand<i32> {
+def phint_op : Operand<i64> {
let ParserMatchClass = PHintInstOperand;
let PrintMethod = "printPHintOp";
let OperandType = "OPERAND_IMMEDIATE";
+ let MIOperandInfo = (ops i64imm);
+ let DecoderMethod = "DecodeUImm<3>";
}
class STSHHI
- : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy", []>,
+ : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy",
+ [(int_aarch64_stshh (i64 imm0_7:$policy))]>,
Sched<[WriteHint]> {
bits<3> policy;
+ // NOTE: ideally, this would have mayLoad = 0, mayStore = 0, but we cannot
+ // model patterns with sufficiently fine granularity.
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 1;
let Inst{20-12} = 0b000011001;
let Inst{11-8} = 0b0110;
let Inst{7-5} = policy;
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 4eb762a00d477..8fa1913ce24e5 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -38,6 +38,9 @@ using DecodeStatus = MCDisassembler::DecodeStatus;
template <int Bits>
static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
const MCDisassembler *Decoder);
+template <int Bits>
+static DecodeStatus DecodeUImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+ const MCDisassembler *Decoder);
#define Success MCDisassembler::Success
#define Fail MCDisassembler::Fail
@@ -1442,6 +1445,16 @@ static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
return Success;
}
+template <int Bits>
+static DecodeStatus DecodeUImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (Imm & ~((1ULL << Bits) - 1))
+ return Fail;
+
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return Success;
+}
+
// Decode 8-bit signed/unsigned immediate for a given element width.
template <int ElementWidth>
static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, uint64_t Addr,
``````````
</details>
https://github.com/llvm/llvm-project/pull/181386
More information about the llvm-commits
mailing list