[clang] [llvm] [AArch64][clang][llvm] Add ACLE `stshh` atomic store builtin (PR #181386)

Jonathan Thackray via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 13 09:02:40 PST 2026


https://github.com/jthackray created https://github.com/llvm/llvm-project/pull/181386

Add `__arm_atomic_store_with_stshh` implementation as defined
in the ACLE. Validate that the arguments passed are correct, and
lower it to the stshh intrinsic plus an atomic store with the
allowed orderings.

Gate this on FEAT_PCDPHINT so that availability matches
hardware support for the `STSHH` instruction. Use an i64
immediate and side-effect modeling to satisfy tablegen and decoding.

>From a2f7504eb0618af821eda1ba47b6d13590200e59 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Fri, 13 Feb 2026 14:42:05 +0000
Subject: [PATCH] [AArch64][clang][llvm] Add ACLE `stshh` atomic store builtin

Add `__arm_atomic_store_with_stshh` implementation as defined
in the ACLE. Validate that the arguments passed are correct, and
lower it to the stshh intrinsic plus an atomic store with the
allowed orderings.

Gate this on FEAT_PCDPHINT so that availability matches
hardware support for the `STSHH` instruction. Use an i64
immediate and side-effect modeling to satisfy tablegen and decoding.
---
 clang/include/clang/Basic/BuiltinsAArch64.def |   3 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   9 ++
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |   7 +
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      |  48 +++++++
 clang/lib/Headers/arm_acle.h                  |   6 +
 clang/lib/Sema/SemaARM.cpp                    | 136 ++++++++++++++++++
 .../CodeGen/AArch64/pcdphint-atomic-store.c   |  31 ++++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |   2 +
 .../lib/Target/AArch64/AArch64InstrFormats.td |  12 +-
 .../Disassembler/AArch64Disassembler.cpp      |  13 ++
 10 files changed, 265 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/pcdphint-atomic-store.c

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 5d7e956b73b87..5d747f4d9c4b2 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -135,6 +135,9 @@ TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64")
 TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64")
 TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
 
+// Atomic store with PCDPHINT
+TARGET_BUILTIN(__builtin_arm_atomic_store_with_stshh, "v.", "t", "pcdphint")
+
 // Armv9.3-A Guarded Control Stack
 TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs")
 TARGET_BUILTIN(__builtin_arm_gcsss, "v*v*", "n", "gcs")
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 85a023435ba23..b17544a4a3daf 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9475,6 +9475,15 @@ def err_atomic_builtin_must_be_pointer_intfltptr : Error<
 def err_atomic_builtin_pointer_size : Error<
   "address argument to atomic builtin must be a pointer to 1,2,4,8 or 16 byte "
   "type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_type : Error<
+  "address argument to '__arm_atomic_store_with_stshh' must be a pointer to an "
+  "8,16,32, or 64-bit integer type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_value_type : Error<
+  "value argument to '__arm_atomic_store_with_stshh' must be an integer of the "
+  "same size as the pointed-to type (%0 invalid)">;
+def err_arm_atomic_store_with_stshh_bad_order : Error<
+  "memory order argument to '__arm_atomic_store_with_stshh' must be one of "
+  "__ATOMIC_RELAXED, __ATOMIC_RELEASE, or __ATOMIC_SEQ_CST">;
 def err_atomic_exclusive_builtin_pointer_size : Error<
   "address argument to load or store exclusive builtin must be a pointer to "
   // Because the range of legal sizes for load/store exclusive varies with the
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 71cf896aede10..b2311e54415b8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -1044,6 +1044,13 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
     return mlir::Value{};
   }
 
+  if (builtinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
+  }
+
   if (builtinID == clang::AArch64::BI__builtin_arm_rndr ||
       builtinID == clang::AArch64::BI__builtin_arm_rndrrs) {
     cgm.errorNYI(expr->getSourceRange(),
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index cb6bbfe07538e..29cf378dbaeca 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5290,6 +5290,54 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, Args);
   }
 
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
+    const Expr *Arg0 = E->getArg(0);
+    const Expr *Arg1 = E->getArg(1);
+    const Expr *Arg2 = E->getArg(2);
+    const Expr *Arg3 = E->getArg(3);
+
+    Value *StoreAddr  = EmitScalarExpr(Arg0);
+    Value *StoreValue = EmitScalarExpr(Arg1);
+
+    llvm::APSInt OrderVal = Arg2->EvaluateKnownConstInt(getContext());
+    llvm::APSInt RetVal   = Arg3->EvaluateKnownConstInt(getContext());
+
+    llvm::AtomicOrdering Ordering;
+    switch (OrderVal.getZExtValue()) {
+    case 0: // __ATOMIC_RELAXED
+      Ordering = llvm::AtomicOrdering::Monotonic;
+      break;
+    case 3: // __ATOMIC_RELEASE
+      Ordering = llvm::AtomicOrdering::Release;
+      break;
+    case 5: // __ATOMIC_SEQ_CST
+      Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
+      break;
+    default:
+      llvm_unreachable(
+          "unexpected memory order for __arm_atomic_store_with_stshh");
+    }
+
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stshh);
+    llvm::Value *Arg = llvm::ConstantInt::get(Int64Ty, RetVal.getZExtValue());
+    CallInst *HintCall = Builder.CreateCall(F, Arg);
+
+    QualType ValQT = Arg0->IgnoreParenImpCasts()
+                         ->getType()
+                         ->castAs<PointerType>()
+                         ->getPointeeType();
+    llvm::Type *ValTy = ConvertType(ValQT);
+
+    CharUnits ValAlign = getContext().getTypeAlignInChars(ValQT);
+    Address Addr = Address(StoreAddr, ValTy, ValAlign);
+    LValue LVal = MakeAddrLValue(Addr, ValQT);
+
+    EmitAtomicStore(RValue::get(StoreValue), LVal, Ordering,
+                    /* isVolatile= */ false,
+                    /* isInit= */ false);
+    return HintCall;
+  }
+
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
       BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
 
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 9a6b6a837fa5a..ec06072bcc4bf 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -840,6 +840,12 @@ __rndrrs(uint64_t *__p) {
 }
 #endif
 
+/* Atomic store with PCDPHINT */
+#if defined(__ARM_FEATURE_PCDPHINT)
+#define __arm_atomic_store_with_stshh(ptr, data, memory_order, ret)            \
+  __builtin_arm_atomic_store_with_stshh((ptr), (data), (memory_order), (ret))
+#endif
+
 /* 11.2 Guarded Control Stack intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 static __inline__ void * __attribute__((__always_inline__, __nodebug__))
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 53e8c002a1962..e2b9000a6acea 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1105,6 +1105,139 @@ bool SemaARM::CheckARMBuiltinFunctionCall(const TargetInfo &TI,
   }
 }
 
+static bool CheckAArch64AtomicStoreWithStshhCall(SemaARM &S,
+                                                 CallExpr *TheCall) {
+  Sema &SemaRef = S.SemaRef;
+  ASTContext &Context = S.getASTContext();
+  DeclRefExpr *DRE =
+      cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+  SourceLocation Loc = DRE->getBeginLoc();
+
+  // Ensure we have the proper number of arguments.
+  if (SemaRef.checkArgCount(TheCall, 4))
+    return true;
+
+  ExprResult PtrRes =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
+
+  // Bail if conversion failed.
+  if (PtrRes.isInvalid())
+    return true;
+
+  TheCall->setArg(0, PtrRes.get());
+  Expr *PointerArg = PtrRes.get();
+
+  // Check arg 0 is a pointer type, err out if not
+  const PointerType *PointerTy = PointerArg->getType()->getAs<PointerType>();
+  if (!PointerTy) {
+    SemaRef.Diag(Loc, diag::err_atomic_builtin_must_be_pointer)
+        << PointerArg->getType() << 0 << PointerArg->getSourceRange();
+    return true;
+  }
+
+  // Reject const-qualified pointee types, with an error
+  QualType ValType = PointerTy->getPointeeType();
+  if (ValType.isConstQualified()) {
+    SemaRef.Diag(Loc, diag::err_atomic_builtin_cannot_be_const)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return true;
+  }
+
+  // Only integer element types are supported.
+  ValType = ValType.getUnqualifiedType();
+  if (!ValType->isIntegerType()) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_type)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return true;
+  }
+
+  // Only 8/16/32/64-bit integers are supported.
+  unsigned Bits = Context.getTypeSize(ValType);
+  switch (Bits) {
+  case 8:
+  case 16:
+  case 32:
+  case 64:
+    break;
+  default:
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_type)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return true;
+  }
+
+  ExprResult ValRes =
+      SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(1));
+
+  // Bail if conversion failed.
+  if (ValRes.isInvalid())
+    return true;
+
+  // Check if value is an integer type.
+  Expr *ValArg = ValRes.get();
+  if (!ValArg->getType()->isIntegerType()) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
+        << ValArg->getType() << ValArg->getSourceRange();
+    return true;
+  }
+
+  // Value width must match the pointee width.
+  if (Context.getTypeSize(ValArg->getType()) != Bits) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_value_type)
+        << ValArg->getType() << ValArg->getSourceRange();
+    return true;
+  }
+
+  // Prepare a cast if the value type differs
+  ExprResult ValArgRes;
+  CastKind CK =
+      ValArg->getType().getCanonicalType() == ValType.getCanonicalType()
+          ? CK_NoOp
+          : CK_IntegralCast;
+
+  // Apply cast to the pointee type.
+  ValArgRes = SemaRef.ImpCastExprToType(ValArg, ValType, CK);
+
+  // Bail if cast failed.
+  if (ValArgRes.isInvalid())
+    return true;
+
+  TheCall->setArg(1, ValArgRes.get());
+  Expr *OrderArg = TheCall->getArg(2);
+
+  // Defer validation for dependent memory_order arguments.
+  if (OrderArg->isValueDependent())
+    return false;
+
+  // Require an order value.
+  std::optional<llvm::APSInt> OrderValOpt =
+      OrderArg->getIntegerConstantExpr(Context);
+  if (!OrderValOpt) {
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_order)
+        << OrderArg->getSourceRange();
+    return true;
+  }
+
+  // Validate order; not used here; used later in codegen.
+  llvm::APSInt OrderVal = *OrderValOpt;
+  int64_t Order = OrderVal.getSExtValue();
+  switch (Order) {
+  case 0:
+  case 3:
+  case 5:
+    break;
+  default:
+    SemaRef.Diag(Loc, diag::err_arm_atomic_store_with_stshh_bad_order)
+        << OrderArg->getSourceRange();
+    return true;
+  }
+
+  // Arg 3 (retention policy) must be between KEEP(0) and STRM(1).
+  if (SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 1))
+    return true;
+
+  return false;
+}
+
 bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
                                               unsigned BuiltinID,
                                               CallExpr *TheCall) {
@@ -1115,6 +1248,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
     return CheckARMBuiltinExclusiveCall(TI, BuiltinID, TheCall);
   }
 
+  if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_stshh)
+    return CheckAArch64AtomicStoreWithStshhCall(*this, TheCall);
+
   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
     return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
            SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 3) ||
diff --git a/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
new file mode 100644
index 0000000000000..79510be522b6a
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/pcdphint-atomic-store.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +pcdphint -D__ARM_FEATURE_PCDPHINT -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_acle.h>
+
+void test_u8(unsigned char *p, unsigned char v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 0);
+}
+// CHECK-LABEL: @test_u8
+// CHECK: call void @llvm.aarch64.stshh(i64 0)
+// CHECK-NEXT: store atomic i8 %{{.*}}, ptr %{{.*}} monotonic
+
+void test_u16(unsigned short *p, unsigned short v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELEASE, 1);
+}
+// CHECK-LABEL: @test_u16
+// CHECK: call void @llvm.aarch64.stshh(i64 1)
+// CHECK-NEXT: store atomic i16 %{{.*}}, ptr %{{.*}} release
+
+void test_u32(unsigned int *p, unsigned int v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_SEQ_CST, 0);
+}
+// CHECK-LABEL: @test_u32
+// CHECK: call void @llvm.aarch64.stshh(i64 0)
+// CHECK-NEXT: store atomic i32 %{{.*}}, ptr %{{.*}} seq_cst
+
+void test_u64(unsigned long long *p, unsigned long long v) {
+  __arm_atomic_store_with_stshh(p, v, __ATOMIC_RELAXED, 1);
+}
+// CHECK-LABEL: @test_u64
+// CHECK: call void @llvm.aarch64.stshh(i64 1)
+// CHECK-NEXT: store atomic i64 %{{.*}}, ptr %{{.*}} monotonic
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 7f4b7383415c1..19ba3a5a740c5 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -62,6 +62,8 @@ def int_aarch64_frint64x
 // HINT
 
 def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
+def int_aarch64_stshh
+    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrHasSideEffects]>;
 
 def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
     [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 7d4e034ca16c8..69fb01ada0b40 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1840,16 +1840,24 @@ def PHintInstOperand : AsmOperandClass {
     let ParserMethod = "tryParsePHintInstOperand";
 }
 
-def phint_op : Operand<i32> {
+def phint_op : Operand<i64> {
     let ParserMatchClass = PHintInstOperand;
    let PrintMethod = "printPHintOp";
    let OperandType = "OPERAND_IMMEDIATE";
+   let MIOperandInfo = (ops i64imm);
+  let DecoderMethod = "DecodeUImm<3>";
 }
 
 class STSHHI
-    : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy", []>,
+    : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy",
+                    [(int_aarch64_stshh (i64 imm0_7:$policy))]>,
       Sched<[WriteHint]> {
   bits<3> policy;
+  // NOTE: ideally, this would have mayLoad = 0, mayStore = 0, but we cannot
+  // model patterns with sufficiently fine granularity.
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 1;
   let Inst{20-12} = 0b000011001;
   let Inst{11-8} = 0b0110;
   let Inst{7-5} = policy;
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 4eb762a00d477..8fa1913ce24e5 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -38,6 +38,9 @@ using DecodeStatus = MCDisassembler::DecodeStatus;
 template <int Bits>
 static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
                                const MCDisassembler *Decoder);
+template <int Bits>
+static DecodeStatus DecodeUImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+                               const MCDisassembler *Decoder);
 
 #define Success MCDisassembler::Success
 #define Fail MCDisassembler::Fail
@@ -1442,6 +1445,16 @@ static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
   return Success;
 }
 
+template <int Bits>
+static DecodeStatus DecodeUImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+                               const MCDisassembler *Decoder) {
+  if (Imm & ~((1ULL << Bits) - 1))
+    return Fail;
+
+  Inst.addOperand(MCOperand::createImm(Imm));
+  return Success;
+}
+
 // Decode 8-bit signed/unsigned immediate for a given element width.
 template <int ElementWidth>
 static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, uint64_t Addr,



More information about the cfe-commits mailing list