[clang] [llvm] [aarch64] Add more MSVC intrinsics (atomic part) (PR #194635)
Adhemerval Zanella via cfe-commits
cfe-commits at lists.llvm.org
Sat Jun 6 04:52:43 PDT 2026
https://github.com/zatrazz updated https://github.com/llvm/llvm-project/pull/194635
>From 564eefcbaafbea6d8edbfb8f6793230f52a53df7 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Mon, 13 Apr 2026 16:04:27 -0300
Subject: [PATCH 01/26] [aarch64] Add support for the __ldar{8|16|32|64} MS
intrinsics (#121689)
Adds support for the following MSVC intrinsics:
* `__ldar8` - maps to LDARB
* `__ldar16` - maps to LDARH
* `__ldar32` - maps to LDAR
* `__ldar64` - maps to LDAR
The emit is done using SeqCst atomic operations because wth RCPC enabled
the AArch64 backend lower acquire loads to LDARP* instead of LDAR*.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 7 ++++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 17 ++++++++++
clang/lib/Headers/arm64intr.h | 13 ++++++++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 32 +++++++++++++++++++
4 files changed, 69 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 15257f3db5b41..45ba96d014328 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -409,3 +409,10 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", Header = "intrin.h" in {
def __hlt : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int, ...)">;
}
+
+let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES", Header = "intrin.h" in {
+ def __ldar8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char const volatile *)">;
+ def __ldar16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short const volatile *)">;
+ def __ldar32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int const volatile *)">;
+ def __ldar64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int const volatile*)">;
+}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index ece8ff21561cf..c40ec9a866691 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5327,6 +5327,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return ConstantInt::get(Builder.getInt32Ty(), 0);
}
+ if (BuiltinID == AArch64::BI__ldar8 || BuiltinID == AArch64::BI__ldar16 ||
+ BuiltinID == AArch64::BI__ldar32 || BuiltinID == AArch64::BI__ldar64) {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ QualType ElTy = E->getArg(0)->getType()->getPointeeType();
+ CharUnits LoadSize = CGM.getContext().getTypeSizeInChars(ElTy);
+ llvm::Type *ITy =
+ llvm::IntegerType::get(getLLVMContext(), LoadSize.getQuantity() * 8);
+ llvm::LoadInst *Load = Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
+ // We need SeqCst instead of Acquire because with RCPC enabled the AArch64
+ // lowers Acquire loads to LDARP* instead of LDAR*. The SeqCst has not RCPC
+ // override and always maps to LDAR*. This is the same apprach used by
+ // __iso_volatile_load (which uses Monotonic plus volatile for plain ldr).
+ Load->setAtomic(llvm::AtomicOrdering::SequentiallyConsistent);
+ Load->setVolatile(true);
+ return Load;
+ }
+
if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
return Builder.CreateFPTrunc(
Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 53a3d57a6e9d1..8eb36b5624ddb 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -41,5 +41,18 @@ typedef enum
_ARM64_BARRIER_OSHLD = 0x1
} _ARM64INTR_BARRIER_TYPE;
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned __int8 __ldar8(const volatile unsigned __int8 *);
+unsigned __int16 __ldar16(const volatile unsigned __int16 *);
+unsigned __int32 __ldar32(const volatile unsigned __int32 *);
+unsigned __int64 __ldar64(const volatile unsigned __int64 *);
+
+#ifdef __cplusplus
+}
+#endif
+
#endif /* __ARM64INTR_H */
#endif /* _MSC_VER */
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index e6a415a0d8805..c1873906808b8 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -668,6 +668,38 @@ void check__prefetch2(void *arg1) {
// CHECK-LINUX: error: call to undeclared function '__prefetch2'
+unsigned char check__ldar8(unsigned char volatile *p) {
+ return __ldar8(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__ldar8(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = load atomic volatile i8, ptr %{{.*}} seq_cst, align 1
+// CHECK-MSCOMPAT: ret i8 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__ldar8'
+
+unsigned short check__ldar16(unsigned short volatile *p) {
+ return __ldar16(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @check__ldar16(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = load atomic volatile i16, ptr %{{.*}} seq_cst, align 2
+// CHECK-MSCOMPAT: ret i16 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__ldar16'
+
+unsigned int check__ldar32(unsigned int volatile *p) {
+ return __ldar32(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @check__ldar32(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = load atomic volatile i32, ptr %{{.*}} seq_cst, align 4
+// CHECK-MSCOMPAT: ret i32 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__ldar32'
+
+unsigned long long int check__ldar64(unsigned long long int volatile *p) {
+ return __ldar64(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @check__ldar64(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = load atomic volatile i64, ptr %{{.*}} seq_cst, align 8
+// CHECK-MSCOMPAT: ret i64 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__ldar64'
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"}
>From 308e953e9c48c464b65d975a3b9648b4dc98357c Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Wed, 15 Apr 2026 09:43:49 -0300
Subject: [PATCH 02/26] [aarch64] Add support for the __stlr{8|16|32|64} MS
intrinsics
Adds support for the following MSVC intrinsics:
* `__stlr8` - maps to STLRB.
* `__stlr16` - maps to STLRH.
* `__stlr32` - maps to STLR.
* `__stlr64` - maps to STLR.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 4 ++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 12 +++++
clang/lib/Headers/arm64intr.h | 5 +++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 44 +++++++++++++++++++
4 files changed, 65 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 45ba96d014328..dd6d4e02c80e2 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -415,4 +415,8 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __ldar16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short const volatile *)">;
def __ldar32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int const volatile *)">;
def __ldar64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int const volatile*)">;
+ def __stlr8 : AArch64NoPrefixTargetLibBuiltin<"void (unsigned char volatile *, unsigned char)">;
+ def __stlr16 : AArch64NoPrefixTargetLibBuiltin<"void (unsigned short volatile *, unsigned short)">;
+ def __stlr32 : AArch64NoPrefixTargetLibBuiltin<"void (unsigned int volatile *, unsigned int)">;
+ def __stlr64 : AArch64NoPrefixTargetLibBuiltin<"void (unsigned long long int volatile *, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index c40ec9a866691..457a1d38ff1a2 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5344,6 +5344,18 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Load;
}
+ if (BuiltinID == AArch64::BI__stlr8 || BuiltinID == AArch64::BI__stlr16 ||
+ BuiltinID == AArch64::BI__stlr32 || BuiltinID == AArch64::BI__stlr64) {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
+ QualType ElTy = E->getArg(0)->getType()->getPointeeType();
+ CharUnits StoreSize = CGM.getContext().getTypeSizeInChars(ElTy);
+ llvm::StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, StoreSize);
+ Store->setAtomic(llvm::AtomicOrdering::Release);
+ Store->setVolatile(true);
+ return Store;
+ }
+
if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
return Builder.CreateFPTrunc(
Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 8eb36b5624ddb..950ecd6aa38cc 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -50,6 +50,11 @@ unsigned __int16 __ldar16(const volatile unsigned __int16 *);
unsigned __int32 __ldar32(const volatile unsigned __int32 *);
unsigned __int64 __ldar64(const volatile unsigned __int64 *);
+void __stlr8(unsigned __int8 volatile *, unsigned __int8);
+void __stlr16(unsigned __int16 volatile *, unsigned __int16);
+void __stlr32(unsigned __int32 volatile *, unsigned __int32);
+void __stlr64(unsigned __int64 volatile *, unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index c1873906808b8..2698a35f0c5aa 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -700,6 +700,50 @@ unsigned long long int check__ldar64(unsigned long long int volatile *p) {
// CHECK-MSCOMPAT: ret i64 %[[RET]]
// CHECK-LINUX: error: call to undeclared function '__ldar64'
+void test__stlr8(unsigned __int8 volatile *p, unsigned __int8 v)
+{
+ __stlr8 (p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}void @test__stlr8(ptr{{.*}}%p, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[DEST:[0-9]+]] = load ptr, ptr %p.addr, align 8
+// CHECK-MSCOMPAT: %[[VALUE:[0-9]+]] = load i8, ptr %v.addr, align 1
+// CHECK-MSCOMPAT: store atomic volatile i8 %[[VALUE]], ptr %[[DEST]] release, align 1
+// CHECK-MSCOMPAT: ret void
+// CHECK-LINUX: error: call to undeclared function '__stlr8'
+
+void test__stlr16(unsigned __int16 volatile *p, unsigned __int16 v)
+{
+ __stlr16 (p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}void @test__stlr16(ptr{{.*}}%p, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[DEST:[0-9]+]] = load ptr, ptr %p.addr, align 8
+// CHECK-MSCOMPAT: %[[VALUE:[0-9]+]] = load i16, ptr %v.addr, align 2
+// CHECK-MSCOMPAT: store atomic volatile i16 %[[VALUE]], ptr %[[DEST]] release, align 2
+// CHECK-MSCOMPAT: ret void
+// CHECK-LINUX: error: call to undeclared function '__stlr16'
+
+void test__stlr32(unsigned __int32 volatile *p, unsigned __int32 v)
+{
+ __stlr32(p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}void @test__stlr32(ptr{{.*}}%p, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[DEST:[0-9]+]] = load ptr, ptr %p.addr, align 8
+// CHECK-MSCOMPAT: %[[VALUE:[0-9]+]] = load i32, ptr %v.addr, align 4
+// CHECK-MSCOMPAT: store atomic volatile i32 %[[VALUE]], ptr %[[DEST]] release, align 4
+// CHECK-MSCOMPAT: ret void
+// CHECK-LINUX: error: call to undeclared function '__stlr32'
+
+void test__stlr64(unsigned __int64 volatile *p, unsigned __int64 v)
+{
+ __stlr64(p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}void @test__stlr64(ptr{{.*}}%p, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[DEST:[0-9]+]] = load ptr, ptr %p.addr, align 8
+// CHECK-MSCOMPAT: %[[VALUE:[0-9]+]] = load i64, ptr %v.addr, align 8
+// CHECK-MSCOMPAT: store atomic volatile i64 %[[VALUE]], ptr %[[DEST]] release, align 8
+// CHECK-MSCOMPAT: ret void
+// CHECK-LINUX: error: call to undeclared function '__stlr64'
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"}
>From 6451ad4276fba0bd8e55b2d5c132e5a178dd22a2 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Fri, 5 Jun 2026 07:53:47 -0300
Subject: [PATCH 03/26] [aarch64] Add support for the __ldxr{8|16|32|64} MS
intrinsics
These MSVC load-exclusive intrinsics lower to the existing llvm.aarch64.ldxr
intrinsic used by the ACLE __builtin_arm_ldrex builtin.
---
clang/include/clang/Basic/BuiltinsAArch64.td | 5 +++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 16 ++++++++++
clang/lib/Headers/arm64intr.h | 5 +++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 31 +++++++++++++++++++
4 files changed, 57 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index dd6d4e02c80e2..b3232319f0122 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -419,4 +419,9 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __stlr16 : AArch64NoPrefixTargetLibBuiltin<"void (unsigned short volatile *, unsigned short)">;
def __stlr32 : AArch64NoPrefixTargetLibBuiltin<"void (unsigned int volatile *, unsigned int)">;
def __stlr64 : AArch64NoPrefixTargetLibBuiltin<"void (unsigned long long int volatile *, unsigned long long int)">;
+
+ def __ldxr8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char const volatile *)">;
+ def __ldxr16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short const volatile *)">;
+ def __ldxr32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int const volatile *)">;
+ def __ldxr64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int const volatile *)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 457a1d38ff1a2..602173dd4ff71 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5344,6 +5344,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Load;
}
+ if (BuiltinID == AArch64::BI__ldxr8 || BuiltinID == AArch64::BI__ldxr16 ||
+ BuiltinID == AArch64::BI__ldxr32 || BuiltinID == AArch64::BI__ldxr64) {
+ // Load-exclusive (LDXR*). Reuse the llvm.aarch64.ldxr lowering of the ACLE
+ // __builtin_arm_ldrex builtin.
+ Value *LoadAddr = EmitScalarExpr(E->getArg(0));
+ QualType Ty = E->getType();
+ llvm::Type *RealResTy = ConvertType(Ty);
+ llvm::Type *IntTy =
+ llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
+ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ldxr, DefaultPtrTy);
+ CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
+ Val->addParamAttr(
+ 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
+ return Builder.CreateTruncOrBitCast(Val, RealResTy);
+ }
+
if (BuiltinID == AArch64::BI__stlr8 || BuiltinID == AArch64::BI__stlr16 ||
BuiltinID == AArch64::BI__stlr32 || BuiltinID == AArch64::BI__stlr64) {
Value *Ptr = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 950ecd6aa38cc..ed411c213406b 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -55,6 +55,11 @@ void __stlr16(unsigned __int16 volatile *, unsigned __int16);
void __stlr32(unsigned __int32 volatile *, unsigned __int32);
void __stlr64(unsigned __int64 volatile *, unsigned __int64);
+unsigned __int8 __ldxr8(const volatile unsigned __int8 *);
+unsigned __int16 __ldxr16(const volatile unsigned __int16 *);
+unsigned __int32 __ldxr32(const volatile unsigned __int32 *);
+unsigned __int64 __ldxr64(const volatile unsigned __int64 *);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index 2698a35f0c5aa..a02a61b329a9f 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -700,6 +700,37 @@ unsigned long long int check__ldar64(unsigned long long int volatile *p) {
// CHECK-MSCOMPAT: ret i64 %[[RET]]
// CHECK-LINUX: error: call to undeclared function '__ldar64'
+unsigned char check__ldxr8(unsigned char volatile *p) {
+ return __ldxr8(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__ldxr8(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:.*]] = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i8) %{{.*}})
+// CHECK-MSCOMPAT: trunc i64 %[[RET]] to i8
+// CHECK-LINUX: error: call to undeclared function '__ldxr8'
+
+unsigned short check__ldxr16(unsigned short volatile *p) {
+ return __ldxr16(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @check__ldxr16(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:.*]] = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i16) %{{.*}})
+// CHECK-MSCOMPAT: trunc i64 %[[RET]] to i16
+// CHECK-LINUX: error: call to undeclared function '__ldxr16'
+
+unsigned int check__ldxr32(unsigned int volatile *p) {
+ return __ldxr32(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @check__ldxr32(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:.*]] = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i32) %{{.*}})
+// CHECK-MSCOMPAT: trunc i64 %[[RET]] to i32
+// CHECK-LINUX: error: call to undeclared function '__ldxr32'
+
+unsigned long long int check__ldxr64(unsigned long long int volatile *p) {
+ return __ldxr64(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @check__ldxr64(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:.*]] = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i64) %{{.*}})
+// CHECK-LINUX: error: call to undeclared function '__ldxr64'
+
void test__stlr8(unsigned __int8 volatile *p, unsigned __int8 v)
{
__stlr8 (p, v);
>From 56bf2fa3cdd5ec5528e398d8d808d35ebefa1658 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Fri, 5 Jun 2026 08:11:24 -0300
Subject: [PATCH 04/26] [aarch64] Add support for the __ldaxr{8|16|32|64} MS
intrinsics
These MSVC load-acquire-exclusive intrinsics lower to the existing
llvm.aarch64.ldaxr intrinsic used by the ACLE __builtin_arm_ldaex builtin.
---
clang/include/clang/Basic/BuiltinsAArch64.td | 5 +++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 16 ++++++++++
clang/lib/Headers/arm64intr.h | 5 +++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 31 +++++++++++++++++++
4 files changed, 57 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index b3232319f0122..8fda6505c6a31 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -424,4 +424,9 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __ldxr16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short const volatile *)">;
def __ldxr32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int const volatile *)">;
def __ldxr64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int const volatile *)">;
+
+ def __ldaxr8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char const volatile *)">;
+ def __ldaxr16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short const volatile *)">;
+ def __ldaxr32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int const volatile *)">;
+ def __ldaxr64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int const volatile *)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 602173dd4ff71..d742d5d5102f2 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5360,6 +5360,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateTruncOrBitCast(Val, RealResTy);
}
+ if (BuiltinID == AArch64::BI__ldaxr8 || BuiltinID == AArch64::BI__ldaxr16 ||
+ BuiltinID == AArch64::BI__ldaxr32 || BuiltinID == AArch64::BI__ldaxr64) {
+ // Load-acquire-exclusive (LDAXR*). Reuse the llvm.aarch64.ldaxr lowering of
+ // the ACLE __builtin_arm_ldaex builtin.
+ Value *LoadAddr = EmitScalarExpr(E->getArg(0));
+ QualType Ty = E->getType();
+ llvm::Type *RealResTy = ConvertType(Ty);
+ llvm::Type *IntTy =
+ llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
+ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ldaxr, DefaultPtrTy);
+ CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldaxr");
+ Val->addParamAttr(
+ 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
+ return Builder.CreateTruncOrBitCast(Val, RealResTy);
+ }
+
if (BuiltinID == AArch64::BI__stlr8 || BuiltinID == AArch64::BI__stlr16 ||
BuiltinID == AArch64::BI__stlr32 || BuiltinID == AArch64::BI__stlr64) {
Value *Ptr = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index ed411c213406b..378f4b4c3d5e3 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -60,6 +60,11 @@ unsigned __int16 __ldxr16(const volatile unsigned __int16 *);
unsigned __int32 __ldxr32(const volatile unsigned __int32 *);
unsigned __int64 __ldxr64(const volatile unsigned __int64 *);
+unsigned __int8 __ldaxr8(const volatile unsigned __int8 *);
+unsigned __int16 __ldaxr16(const volatile unsigned __int16 *);
+unsigned __int32 __ldaxr32(const volatile unsigned __int32 *);
+unsigned __int64 __ldaxr64(const volatile unsigned __int64 *);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index a02a61b329a9f..e33c6a266b2db 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -731,6 +731,37 @@ unsigned long long int check__ldxr64(unsigned long long int volatile *p) {
// CHECK-MSCOMPAT: %[[RET:.*]] = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i64) %{{.*}})
// CHECK-LINUX: error: call to undeclared function '__ldxr64'
+unsigned char check__ldaxr8(unsigned char volatile *p) {
+ return __ldaxr8(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__ldaxr8(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:.*]] = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i8) %{{.*}})
+// CHECK-MSCOMPAT: trunc i64 %[[RET]] to i8
+// CHECK-LINUX: error: call to undeclared function '__ldaxr8'
+
+unsigned short check__ldaxr16(unsigned short volatile *p) {
+ return __ldaxr16(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @check__ldaxr16(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:.*]] = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i16) %{{.*}})
+// CHECK-MSCOMPAT: trunc i64 %[[RET]] to i16
+// CHECK-LINUX: error: call to undeclared function '__ldaxr16'
+
+unsigned int check__ldaxr32(unsigned int volatile *p) {
+ return __ldaxr32(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @check__ldaxr32(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:.*]] = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i32) %{{.*}})
+// CHECK-MSCOMPAT: trunc i64 %[[RET]] to i32
+// CHECK-LINUX: error: call to undeclared function '__ldaxr32'
+
+unsigned long long int check__ldaxr64(unsigned long long int volatile *p) {
+ return __ldaxr64(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @check__ldaxr64(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[RET:.*]] = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i64) %{{.*}})
+// CHECK-LINUX: error: call to undeclared function '__ldaxr64'
+
void test__stlr8(unsigned __int8 volatile *p, unsigned __int8 v)
{
__stlr8 (p, v);
>From fa1b847b762d407cee5b8ff5ada4adc53392f516 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Fri, 5 Jun 2026 08:13:59 -0300
Subject: [PATCH 05/26] [aarch64] Add support for the __stxr{8|16|32|64} MS
intrinsics
These MSVC store-exclusive intrinsics lower to the existing llvm.aarch64.stxr
intrinsic used by the ACLE __builtin_arm_strex builtin. MSVC takes the
arguments as (pointer, value) (the reverse of the ACLE builtin) and returns
the store status as unsigned char.
---
clang/include/clang/Basic/BuiltinsAArch64.td | 5 +++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 22 +++++++++++++
clang/lib/Headers/arm64intr.h | 5 +++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 32 +++++++++++++++++++
4 files changed, 64 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 8fda6505c6a31..0fa3d3d9a5266 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -429,4 +429,9 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __ldaxr16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short const volatile *)">;
def __ldaxr32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int const volatile *)">;
def __ldaxr64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int const volatile *)">;
+
+ def __stxr8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char volatile *, unsigned char)">;
+ def __stxr16 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned short volatile *, unsigned short)">;
+ def __stxr32 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned int volatile *, unsigned int)">;
+ def __stxr64 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned long long int volatile *, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index d742d5d5102f2..85cf51c5ee615 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5388,6 +5388,28 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Store;
}
+ if (BuiltinID == AArch64::BI__stxr8 || BuiltinID == AArch64::BI__stxr16 ||
+ BuiltinID == AArch64::BI__stxr32 || BuiltinID == AArch64::BI__stxr64) {
+ // Store-exclusive (STXR*). Reuse the llvm.aarch64.stxr lowering of the ACLE
+ // __builtin_arm_strex builtin. MSVC takes (ptr, value) (the reverse of the
+ // ACLE builtin) and returns the store status as unsigned char.
+ Value *StoreAddr = EmitScalarExpr(E->getArg(0));
+ Value *StoreVal = EmitScalarExpr(E->getArg(1));
+
+ QualType Ty = E->getArg(1)->getType();
+ llvm::Type *StoreTy =
+ llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
+ StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
+
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::aarch64_stxr, StoreAddr->getType());
+ CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
+ CI->addParamAttr(
+ 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
+ return Builder.CreateTrunc(CI, ConvertType(E->getType()));
+ }
+
+
if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
return Builder.CreateFPTrunc(
Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 378f4b4c3d5e3..a0cdd669bca55 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -65,6 +65,11 @@ unsigned __int16 __ldaxr16(const volatile unsigned __int16 *);
unsigned __int32 __ldaxr32(const volatile unsigned __int32 *);
unsigned __int64 __ldaxr64(const volatile unsigned __int64 *);
+unsigned __int8 __stxr8(volatile unsigned __int8 *, unsigned __int8);
+unsigned __int8 __stxr16(volatile unsigned __int16 *, unsigned __int16);
+unsigned __int8 __stxr32(volatile unsigned __int32 *, unsigned __int32);
+unsigned __int8 __stxr64(volatile unsigned __int64 *, unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index e33c6a266b2db..fbfbb9ab70851 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -762,6 +762,38 @@ unsigned long long int check__ldaxr64(unsigned long long int volatile *p) {
// CHECK-MSCOMPAT: %[[RET:.*]] = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i64) %{{.*}})
// CHECK-LINUX: error: call to undeclared function '__ldaxr64'
+unsigned char check__stxr8(unsigned char volatile *p, unsigned char v) {
+ return __stxr8(p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__stxr8(ptr{{.*}}%p, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[ST:.*]] = call i32 @llvm.aarch64.stxr.p0(i64 %{{.*}}, ptr elementtype(i8) %{{.*}})
+// CHECK-MSCOMPAT: trunc i32 %[[ST]] to i8
+// CHECK-LINUX: error: call to undeclared function '__stxr8'
+
+unsigned char check__stxr16(unsigned short volatile *p, unsigned short v) {
+ return __stxr16(p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__stxr16(ptr{{.*}}%p, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[ST:.*]] = call i32 @llvm.aarch64.stxr.p0(i64 %{{.*}}, ptr elementtype(i16) %{{.*}})
+// CHECK-MSCOMPAT: trunc i32 %[[ST]] to i8
+// CHECK-LINUX: error: call to undeclared function '__stxr16'
+
+unsigned char check__stxr32(unsigned int volatile *p, unsigned int v) {
+ return __stxr32(p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__stxr32(ptr{{.*}}%p, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[ST:.*]] = call i32 @llvm.aarch64.stxr.p0(i64 %{{.*}}, ptr elementtype(i32) %{{.*}})
+// CHECK-MSCOMPAT: trunc i32 %[[ST]] to i8
+// CHECK-LINUX: error: call to undeclared function '__stxr32'
+
+unsigned char check__stxr64(unsigned long long int volatile *p, unsigned long long int v) {
+ return __stxr64(p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__stxr64(ptr{{.*}}%p, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[ST:.*]] = call i32 @llvm.aarch64.stxr.p0(i64 %{{.*}}, ptr elementtype(i64) %{{.*}})
+// CHECK-MSCOMPAT: trunc i32 %[[ST]] to i8
+// CHECK-LINUX: error: call to undeclared function '__stxr64'
+
void test__stlr8(unsigned __int8 volatile *p, unsigned __int8 v)
{
__stlr8 (p, v);
>From 3ae3aea6d9097ee8d6899ef459c7f790ea170256 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Fri, 5 Jun 2026 08:17:34 -0300
Subject: [PATCH 06/26] [aarch64] Add support for the __stlxr{8|16|32|64} MS
intrinsics
These MSVC store-release-exclusive intrinsics lower to the existing
llvm.aarch64.stlxr intrinsic used by the ACLE __builtin_arm_stlex builtin.
As with __stxr, MSVC takes the arguments as (pointer, value) and returns the
store status as unsigned char.
---
clang/include/clang/Basic/BuiltinsAArch64.td | 5 +++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 19 +++++++----
clang/lib/Headers/arm64intr.h | 5 +++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 32 +++++++++++++++++++
4 files changed, 55 insertions(+), 6 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 0fa3d3d9a5266..28736e128b7e3 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -434,4 +434,9 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __stxr16 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned short volatile *, unsigned short)">;
def __stxr32 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned int volatile *, unsigned int)">;
def __stxr64 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned long long int volatile *, unsigned long long int)">;
+
+ def __stlxr8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char volatile *, unsigned char)">;
+ def __stlxr16 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned short volatile *, unsigned short)">;
+ def __stlxr32 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned int volatile *, unsigned int)">;
+ def __stlxr64 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned long long int volatile *, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 85cf51c5ee615..416dfaacfb03b 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5389,10 +5389,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
if (BuiltinID == AArch64::BI__stxr8 || BuiltinID == AArch64::BI__stxr16 ||
- BuiltinID == AArch64::BI__stxr32 || BuiltinID == AArch64::BI__stxr64) {
- // Store-exclusive (STXR*). Reuse the llvm.aarch64.stxr lowering of the ACLE
- // __builtin_arm_strex builtin. MSVC takes (ptr, value) (the reverse of the
- // ACLE builtin) and returns the store status as unsigned char.
+ BuiltinID == AArch64::BI__stxr32 || BuiltinID == AArch64::BI__stxr64 ||
+ BuiltinID == AArch64::BI__stlxr8 || BuiltinID == AArch64::BI__stlxr16 ||
+ BuiltinID == AArch64::BI__stlxr32 || BuiltinID == AArch64::BI__stlxr64) {
+ // Store-(release-)exclusive (STXR*/STLXR*). Reuse the llvm.aarch64.stxr /
+ // stlxr lowering of the ACLE __builtin_arm_strex / stlex builtins. MSVC
+ // takes (ptr, value) (the reverse of the ACLE builtins) and returns the
+ // store status as unsigned char.
+ bool IsRelease =
+ BuiltinID == AArch64::BI__stlxr8 || BuiltinID == AArch64::BI__stlxr16 ||
+ BuiltinID == AArch64::BI__stlxr32 || BuiltinID == AArch64::BI__stlxr64;
Value *StoreAddr = EmitScalarExpr(E->getArg(0));
Value *StoreVal = EmitScalarExpr(E->getArg(1));
@@ -5401,8 +5407,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
- Function *F =
- CGM.getIntrinsic(Intrinsic::aarch64_stxr, StoreAddr->getType());
+ Function *F = CGM.getIntrinsic(
+ IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr,
+ StoreAddr->getType());
CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
CI->addParamAttr(
1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index a0cdd669bca55..3bf6c2cb57091 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -70,6 +70,11 @@ unsigned __int8 __stxr16(volatile unsigned __int16 *, unsigned __int16);
unsigned __int8 __stxr32(volatile unsigned __int32 *, unsigned __int32);
unsigned __int8 __stxr64(volatile unsigned __int64 *, unsigned __int64);
+unsigned __int8 __stlxr8(volatile unsigned __int8 *, unsigned __int8);
+unsigned __int8 __stlxr16(volatile unsigned __int16 *, unsigned __int16);
+unsigned __int8 __stlxr32(volatile unsigned __int32 *, unsigned __int32);
+unsigned __int8 __stlxr64(volatile unsigned __int64 *, unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index fbfbb9ab70851..420bf80aa0af2 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -794,6 +794,38 @@ unsigned char check__stxr64(unsigned long long int volatile *p, unsigned long lo
// CHECK-MSCOMPAT: trunc i32 %[[ST]] to i8
// CHECK-LINUX: error: call to undeclared function '__stxr64'
+unsigned char check__stlxr8(unsigned char volatile *p, unsigned char v) {
+ return __stlxr8(p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__stlxr8(ptr{{.*}}%p, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[ST:.*]] = call i32 @llvm.aarch64.stlxr.p0(i64 %{{.*}}, ptr elementtype(i8) %{{.*}})
+// CHECK-MSCOMPAT: trunc i32 %[[ST]] to i8
+// CHECK-LINUX: error: call to undeclared function '__stlxr8'
+
+unsigned char check__stlxr16(unsigned short volatile *p, unsigned short v) {
+ return __stlxr16(p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__stlxr16(ptr{{.*}}%p, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[ST:.*]] = call i32 @llvm.aarch64.stlxr.p0(i64 %{{.*}}, ptr elementtype(i16) %{{.*}})
+// CHECK-MSCOMPAT: trunc i32 %[[ST]] to i8
+// CHECK-LINUX: error: call to undeclared function '__stlxr16'
+
+unsigned char check__stlxr32(unsigned int volatile *p, unsigned int v) {
+ return __stlxr32(p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__stlxr32(ptr{{.*}}%p, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[ST:.*]] = call i32 @llvm.aarch64.stlxr.p0(i64 %{{.*}}, ptr elementtype(i32) %{{.*}})
+// CHECK-MSCOMPAT: trunc i32 %[[ST]] to i8
+// CHECK-LINUX: error: call to undeclared function '__stlxr32'
+
+unsigned char check__stlxr64(unsigned long long int volatile *p, unsigned long long int v) {
+ return __stlxr64(p, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__stlxr64(ptr{{.*}}%p, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[ST:.*]] = call i32 @llvm.aarch64.stlxr.p0(i64 %{{.*}}, ptr elementtype(i64) %{{.*}})
+// CHECK-MSCOMPAT: trunc i32 %[[ST]] to i8
+// CHECK-LINUX: error: call to undeclared function '__stlxr64'
+
void test__stlr8(unsigned __int8 volatile *p, unsigned __int8 v)
{
__stlr8 (p, v);
>From 85b1774678beb2a43ee1cc646029ba85ceb39017 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Fri, 5 Jun 2026 09:54:55 -0300
Subject: [PATCH 07/26] [AArch64] Add a CRm operand to the llvm.aarch64.clrex
intrinsic
Parameterize llvm.aarch64.clrex with the CLREX CRm immediate (0-15) so it can
emit "clrex #CRm" instead of always "clrex" (CRm=15), mirroring the
llvm.aarch64.dmb family. The CLREX CRm operand is changed from imm0_15 to the
i32 imm32_0_15 to match the intrinsic argument; assembly is unaffected (same
parser match class). The ACLE __builtin_arm_clrex keeps CRm=15.
This is a prerequisite for implementing the MSVC __clrex(crm) intrinsic.
---
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 3 ++-
clang/test/CodeGen/builtins-arm-exclusive.c | 2 +-
llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +-
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 3 ++-
llvm/lib/Target/AArch64/AArch64InstrAtomics.td | 5 ++---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 3 ++-
llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll | 4 ++--
7 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 416dfaacfb03b..13c9a1e1f7cf5 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -4880,7 +4880,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
- return Builder.CreateCall(F);
+ // The ACLE __clrex clears the exclusive monitor with CRm == 15.
+ return Builder.CreateCall(F, {Builder.getInt32(15)});
}
if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
diff --git a/clang/test/CodeGen/builtins-arm-exclusive.c b/clang/test/CodeGen/builtins-arm-exclusive.c
index f27dcfc81f34b..6ca99f235201d 100644
--- a/clang/test/CodeGen/builtins-arm-exclusive.c
+++ b/clang/test/CodeGen/builtins-arm-exclusive.c
@@ -255,7 +255,7 @@ void test_clrex(void) {
__builtin_arm_clrex();
// CHECK: call void @llvm.arm.clrex()
-// CHECK-ARM64: call void @llvm.aarch64.clrex()
+// CHECK-ARM64: call void @llvm.aarch64.clrex(i32 15)
}
#ifdef __aarch64__
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index ba0d7c02bf427..0b91837f3a80a 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -32,7 +32,7 @@ def int_aarch64_stlxp : Intrinsic<[llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
[IntrNoFree, IntrWillReturn]>;
-def int_aarch64_clrex : Intrinsic<[]>;
+def int_aarch64_clrex : Intrinsic<[], [llvm_i32_ty]>;
def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index efebe835fa530..52af76da537b6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -31359,7 +31359,8 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
IRBuilderBase &Builder) const {
- Builder.CreateIntrinsic(Intrinsic::aarch64_clrex, {});
+ // Clear the exclusive monitor with CRm == 15.
+ Builder.CreateIntrinsic(Intrinsic::aarch64_clrex, {}, {Builder.getInt32(15)});
}
Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 2187f21abb70f..c324b38e56bc0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -477,9 +477,8 @@ def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
(STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
-// And clear exclusive.
-
-def : Pat<(int_aarch64_clrex), (CLREX 0xf)>;
+// And clear exclusive. The (int_aarch64_clrex imm) pattern is defined on the
+// CLREX instruction in AArch64InstrInfo.td.
//===----------------------------------
// Atomic cmpxchg for -O0
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 151cbd9bc5a7c..7bb8037945ee9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1583,7 +1583,8 @@ def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
// As far as LLVM is concerned this writes to the system's exclusive monitors.
let mayLoad = 1, mayStore = 1 in
-def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
+def CLREX : CRmSystemI<imm32_0_15, 0b010, "clrex",
+ [(int_aarch64_clrex (i32 imm32_0_15:$CRm))]>;
// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
// model patterns with sufficiently fine granularity.
diff --git a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
index ccd191f163b01..11199a2423f8e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
@@ -170,11 +170,11 @@ define dso_local void @test_clear() {
; CHECK: // %bb.0:
; CHECK-NEXT: clrex
; CHECK-NEXT: ret
- call void @llvm.aarch64.clrex()
+ call void @llvm.aarch64.clrex(i32 15)
ret void
}
-declare void @llvm.aarch64.clrex() nounwind
+declare void @llvm.aarch64.clrex(i32) nounwind
define dso_local i128 @test_load_acquire_i128(ptr %p) nounwind readonly {
; CHECK-LABEL: test_load_acquire_i128:
>From b7cd7ceb0c3e647bb26498cbe0dc3297bbe2b213 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Fri, 5 Jun 2026 10:04:22 -0300
Subject: [PATCH 08/26] [aarch64] Add support for the __clrex MS intrinsic
The MSVC __clrex(crm) intrinsic clears the local exclusive monitor and emits
"clrex #crm". It lowers to the llvm.aarch64.clrex intrinsic, which now carries
the CRm immediate. Sema restricts crm to a constant in [0, 15].
---
clang/include/clang/Basic/BuiltinsAArch64.td | 2 ++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 8 ++++++++
clang/lib/Headers/arm64intr.h | 2 ++
clang/lib/Sema/SemaARM.cpp | 3 +++
clang/test/CodeGen/arm64-microsoft-intrinsics.c | 7 +++++++
clang/test/Sema/builtins-microsoft-arm64.c | 5 +++++
6 files changed, 27 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 28736e128b7e3..30515e27679eb 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -439,4 +439,6 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __stlxr16 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned short volatile *, unsigned short)">;
def __stlxr32 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned int volatile *, unsigned int)">;
def __stlxr64 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned long long int volatile *, unsigned long long int)">;
+
+ def __clrex : AArch64NoPrefixTargetLibBuiltin<"void (unsigned char)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 13c9a1e1f7cf5..887a4895f400a 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -4884,6 +4884,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {Builder.getInt32(15)});
}
+ if (BuiltinID == clang::AArch64::BI__clrex) {
+ // MSVC __clrex(crm) clears the exclusive monitor with the given CRm (a
+ // constant in [0, 15], enforced by Sema). Emit "clrex #crm".
+ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
+ Value *CRm = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int32Ty);
+ return Builder.CreateCall(F, {CRm});
+ }
+
if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
llvm::SyncScope::SingleThread);
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 3bf6c2cb57091..2945eee4cb65a 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -75,6 +75,8 @@ unsigned __int8 __stlxr16(volatile unsigned __int16 *, unsigned __int16);
unsigned __int8 __stlxr32(volatile unsigned __int32 *, unsigned __int32);
unsigned __int8 __stlxr64(volatile unsigned __int64 *, unsigned __int64);
+void __clrex(unsigned __int8);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 5e7504fab416d..92cf626545807 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1189,6 +1189,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
if (BuiltinID == AArch64::BI__hlt)
return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 0xffff);
+ if (BuiltinID == AArch64::BI__clrex)
+ return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 15);
+
if (CheckNeonBuiltinFunctionCall(TI, BuiltinID, TheCall))
return true;
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index 420bf80aa0af2..b99e6948c6fd9 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -826,6 +826,13 @@ unsigned char check__stlxr64(unsigned long long int volatile *p, unsigned long l
// CHECK-MSCOMPAT: trunc i32 %[[ST]] to i8
// CHECK-LINUX: error: call to undeclared function '__stlxr64'
+void check__clrex(void) {
+ __clrex(15);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}void @check__clrex(){{.*}}{
+// CHECK-MSCOMPAT: call void @llvm.aarch64.clrex(i32 15)
+// CHECK-LINUX: error: call to undeclared function '__clrex'
+
void test__stlr8(unsigned __int8 volatile *p, unsigned __int8 v)
{
__stlr8 (p, v);
diff --git a/clang/test/Sema/builtins-microsoft-arm64.c b/clang/test/Sema/builtins-microsoft-arm64.c
index 22163ab3fa851..0ec122a8d540f 100644
--- a/clang/test/Sema/builtins-microsoft-arm64.c
+++ b/clang/test/Sema/builtins-microsoft-arm64.c
@@ -14,6 +14,11 @@ void check__hlt() {
__hlt(65536); // expected-error-re {{argument value {{.*}} is outside the valid range}}
}
+void check__clrex(unsigned char x) {
+ __clrex(16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __clrex(x); // expected-error {{argument to '__clrex' must be a constant integer}}
+}
+
void check__getReg(void) {
__getReg(-1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
__getReg(32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
>From 9079b4a48856806626f9db19fabddc1f8826eed5 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Thu, 23 Apr 2026 10:52:29 -0300
Subject: [PATCH 09/26] [aarch64] Add support for cas builtin
The new intrinsic issues CAS* instruction directly, regardless of LSE
target features. This will be used to implement MSVC __casX
builtins.
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 15 ++++++
.../lib/Target/AArch64/AArch64InstrAtomics.td | 19 +++++++
.../lib/Target/AArch64/AArch64InstrFormats.td | 23 ++++++---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 +++
.../test/CodeGen/AArch64/ms-intrinsics-cas.ll | 50 +++++++++++++++++++
5 files changed, 106 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 0b91837f3a80a..8dce37165f035 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -34,6 +34,21 @@ def int_aarch64_stlxp : Intrinsic<[llvm_i32_ty],
def int_aarch64_clrex : Intrinsic<[], [llvm_i32_ty]>;
+// CAS intrinsics — emit CAS* (no acquire/release) directly, regardless of the
+// LSE target feature. CAS{H,b} comparand/value use i32; CAS{X}uses i64.
+def int_aarch64_cas8 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_cas16 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_cas32 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_cas64 : Intrinsic<[llvm_i64_ty],
+ [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
+ [IntrNoFree, IntrWillReturn]>;
+
def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index c324b38e56bc0..9ec3c50ee8e32 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -567,6 +567,25 @@ let Predicates = [HasLSFE] in {
defm : LDBFPOPregister_patterns<"LDBFMIN", "atomic_load_fminimum">;
}
+// Unconditional patterns for the __cas* MSVC builtins to emit CAS* regardless
+// of the LSE subtarget feature. cas{b,h} use only the low bits of the
+// comparison register, so the zero extension masking emitted by CodeGen
+// (zext i8/i16 -> i32 -> 'and w,w,#mask') is redundant.
+def : Pat<(i32 (int_aarch64_cas8 GPR64sp:$Rn,
+ (and GPR32:$Rs, (i32 255)), (and GPR32:$Rt, (i32 255)))),
+ (CASB_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_cas8 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASB_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_cas16 GPR64sp:$Rn,
+ (and GPR32:$Rs, (i32 65535)), (and GPR32:$Rt, (i32 65535)))),
+ (CASH_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_cas16 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASH_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_cas32 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASW_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i64 (int_aarch64_cas64 GPR64sp:$Rn, GPR64:$Rs, GPR64:$Rt)),
+ (CASX_cg GPR64:$Rs, GPR64:$Rt, GPR64sp:$Rn)>;
+
// v8.9a/v9.4a FEAT_LRCPC patterns
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index c252f4933dc18..2aabdb77b8582 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12403,7 +12403,7 @@ class CryptoRRRi2Tied<bits<1>op0, bits<2>op1, string asm>
// ST<OP>{<order>}[<size>] <Ws>, [<Xn|SP>]
// ST<OP>{<order>} <Xs>, [<Xn|SP>]
-let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
class BaseCASEncoding<dag oops, dag iops, string asm, string operands,
string cstr, list<dag> pattern>
: I<oops, iops, asm, operands, cstr, pattern> {
@@ -12424,7 +12424,6 @@ class BaseCASEncoding<dag oops, dag iops, string asm, string operands,
let Inst{14-10} = 0b11111;
let Inst{9-5} = Rn;
let Inst{4-0} = Rt;
- let Predicates = [HasLSE];
}
class BaseCAS<string order, string size, RegisterClass RC>
@@ -12435,13 +12434,23 @@ class BaseCAS<string order, string size, RegisterClass RC>
let NP = 1;
}
-multiclass CompareAndSwap<bits<1> Acq, bits<1> Rel, string order> {
- let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseCAS<order, "b", GPR32>;
- let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseCAS<order, "h", GPR32>;
- let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseCAS<order, "", GPR32>;
- let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseCAS<order, "", GPR64>;
+multiclass CompareAndSwap_impl<bits<1> Acq, bits<1> Rel, string order, list<Predicate> preds, string suffix> {
+ let Predicates = preds, Acq = Acq, Rel = Rel in {
+ let Sz = 0b00 in def "B" # suffix : BaseCAS<order, "b", GPR32>;
+ let Sz = 0b01 in def "H" # suffix : BaseCAS<order, "h", GPR32>;
+ let Sz = 0b10 in def "W" # suffix : BaseCAS<order, "", GPR32>;
+ let Sz = 0b11 in def "X" # suffix : BaseCAS<order, "", GPR64>;
+ }
}
+multiclass CompareAndSwap<bits<1> Acq, bits<1> Rel, string order>
+ : CompareAndSwap_impl<Acq, Rel, order, [HasLSE], "">;
+
+// For isCodeGenOnly use by MS intrinsics that must emit CAS* regardless
+// of the -march setting.
+multiclass CompareAndSwap_cg<bits<1> Acq, bits<1> Rel, string order>
+ : CompareAndSwap_impl<Acq, Rel, order, [], "_cg">;
+
class BaseCASP<string order, string size, RegisterOperand RC>
: BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn),
"casp" # order # size, "\t$Rs, $Rt, [$Rn]",
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7bb8037945ee9..922c53606d507 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3026,6 +3026,12 @@ defm CASA : CompareAndSwap<1, 0, "a">;
defm CASL : CompareAndSwap<0, 1, "l">;
defm CASAL : CompareAndSwap<1, 1, "al">;
+// isCodeGenOnly=1: excluded from asm parser/disassembler, no HasLSE predicate.
+// Used by the __cas* MS intrinsics to force CAS* emission regardless of -march.
+let isCodeGenOnly = 1 in {
+ defm CAS : CompareAndSwap_cg<0, 0, "">;
+}
+
// v8.1 atomic CASP
defm CASP : CompareAndSwapPair<0, 0, "">;
defm CASPA : CompareAndSwapPair<1, 0, "a">;
diff --git a/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll b/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
new file mode 100644
index 0000000000000..b3ee250bd96c3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel=0 -global-isel=false \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+lse -fast-isel=0 \
+; RUN: -global-isel=false -verify-machineinstrs < %s | FileCheck %s
+
+; Tests for the __cas* MS builtins on AArch64. These lower to
+; llvm.aarch64.cas* intrinsics and must select the corresponding CAS instruction
+; regardless of whether +lse is present in the target features (the _cg ISel
+; variants have no feature predicate).
+
+define i32 @test_cas8(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_cas8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casb w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.cas8(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i32 @test_cas16(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_cas16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cash w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.cas16(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i32 @test_cas32(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_cas32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cas w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.cas32(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i64 @test_cas64(ptr %p, i64 %rs, i64 %rt) {
+; CHECK-LABEL: test_cas64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cas x1, x2, [x0]
+; CHECK-NEXT: mov x0, x1
+; CHECK-NEXT: ret
+ %r = call i64 @llvm.aarch64.cas64(ptr %p, i64 %rs, i64 %rt)
+ ret i64 %r
+}
\ No newline at end of file
>From bd9d251ff03878689bb724999ad37dd19e1db55f Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Wed, 15 Apr 2026 16:53:14 -0300
Subject: [PATCH 10/26] [aarch64] Add support for the __cas{8|16|32|64} MS
intrinsics
Adds support for the following MSVC intrinsics:
* `__cas8` - maps to CASB.
* `__cas16` - maps to CASH.
* `__cas32` - maps to CAS.
* `__cas64` - maps to CAS.
The emit is done using new intrisincs to issue CAS* instruction
directly, regardless of LSE target feature. This mimics MSVC compiler
and the idea of the builtin to mimic an inline asm.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 5 ++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 40 +++++++++++++
clang/lib/Headers/arm64intr.h | 9 +++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 56 +++++++++++++++++++
4 files changed, 110 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 30515e27679eb..340655a5349f3 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -441,4 +441,9 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __stlxr64 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned long long int volatile *, unsigned long long int)">;
def __clrex : AArch64NoPrefixTargetLibBuiltin<"void (unsigned char)">;
+
+ def __cas8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char volatile *, unsigned char, unsigned char)">;
+ def __cas16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short, unsigned short)">;
+ def __cas32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int, unsigned int)">;
+ def __cas64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 887a4895f400a..29c009bef5e6f 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5425,6 +5425,46 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateTrunc(CI, ConvertType(E->getType()));
}
+ if (BuiltinID == AArch64::BI__cas8 || BuiltinID == AArch64::BI__cas16 ||
+ BuiltinID == AArch64::BI__cas32 || BuiltinID == AArch64::BI__cas64) {
+ unsigned IntrID;
+ llvm::Type *IntrArgTy;
+ switch (BuiltinID) {
+ case AArch64::BI__cas8:
+ IntrID = Intrinsic::aarch64_cas8;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__cas16:
+ IntrID = Intrinsic::aarch64_cas16;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__cas32:
+ IntrID = Intrinsic::aarch64_cas32;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__cas64:
+ IntrID = Intrinsic::aarch64_cas64;
+ IntrArgTy = Builder.getInt64Ty();
+ break;
+ default:
+ llvm_unreachable("missing builtin ID in switch!");
+ }
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Comp = EmitScalarExpr(E->getArg(1));
+ Value *Val = EmitScalarExpr(E->getArg(2));
+ // For 8/16-bit we need to zext to GRP size
+ if (Comp->getType() != IntrArgTy)
+ Comp = Builder.CreateZExt(Comp, IntrArgTy);
+ if (Val->getType() != IntrArgTy)
+ Val = Builder.CreateZExt(Val, IntrArgTy);
+ Value *Result =
+ Builder.CreateCall(CGM.getIntrinsic(IntrID), {Ptr, Comp, Val});
+ // CAS{B/H} return i32 (zero-extended); truncate to declared type.
+ llvm::Type *RetTy = ConvertType(E->getType());
+ if (Result->getType() != RetTy)
+ Result = Builder.CreateTrunc(Result, RetTy);
+ return Result;
+ }
if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
return Builder.CreateFPTrunc(
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 2945eee4cb65a..2540ac8bc5e4c 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -77,6 +77,15 @@ unsigned __int8 __stlxr64(volatile unsigned __int64 *, unsigned __int64);
void __clrex(unsigned __int8);
+unsigned __int8 __cas8(unsigned __int8 volatile *, unsigned __int8,
+ unsigned __int8);
+unsigned __int16 __cas16(unsigned __int16 volatile *, unsigned __int16,
+ unsigned __int16);
+unsigned __int32 __cas32(unsigned __int32 volatile *, unsigned __int32,
+ unsigned __int32);
+unsigned __int64 __cas64(unsigned __int64 volatile *, unsigned __int64,
+ unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index b99e6948c6fd9..7fff821012269 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -877,6 +877,62 @@ void test__stlr64(unsigned __int64 volatile *p, unsigned __int64 v)
// CHECK-MSCOMPAT: ret void
// CHECK-LINUX: error: call to undeclared function '__stlr64'
+unsigned char test__cas8(unsigned char volatile* t, unsigned char c, unsigned char v)
+{
+ return __cas8 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @test__cas8(ptr{{.*}}%t, i8{{.*}}%c, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i8, ptr %c.addr, align 1
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i8, ptr %v.addr, align 1
+// CHECK-MSCOMPAT: %[[ZEXTC:[0-9]+]] = zext i8 %[[TMPC]] to i32
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i8 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.cas8(ptr %[[TMPT]], i32 %[[ZEXTC]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[RETT:[0-9]+]] = trunc i32 %[[RET]] to i8
+// CHECK-MSCOMPAT: ret i8 %[[RETT]]
+// CHECK-LINUX: error: call to undeclared function '__cas8'
+
+unsigned short test__cas16(unsigned short volatile* t, unsigned short c, unsigned short v)
+{
+ return __cas16 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @test__cas16(ptr{{.*}}%t, i16{{.*}}%c, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i16, ptr %c.addr, align 2
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i16, ptr %v.addr, align 2
+// CHECK-MSCOMPAT: %[[ZEXTC:[0-9]+]] = zext i16 %[[TMPC]] to i32
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i16 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.cas16(ptr %[[TMPT]], i32 %[[ZEXTC]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[RETT:[0-9]+]] = trunc i32 %[[RET]] to i16
+// CHECK-MSCOMPAT: ret i16 %[[RETT]]
+// CHECK-LINUX: error: call to undeclared function '__cas16'
+
+unsigned int test__cas32(unsigned int volatile* t, unsigned int c, unsigned int v)
+{
+ return __cas32 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @test__cas32(ptr{{.*}}%t, i32{{.*}}%c, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i32, ptr %c.addr, align 4
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i32, ptr %v.addr, align 4
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.cas32(ptr %[[TMPT]], i32 %[[TMPC]], i32 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i32 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__cas32'
+
+unsigned long long int test__cas64(unsigned long long int volatile* t,
+ unsigned long long int c,
+ unsigned long long int v)
+{
+ return __cas64 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @test__cas64(ptr{{.*}}%t, i64{{.*}}%c, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i64, ptr %c.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i64, ptr %v.addr, align 8
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i64 @llvm.aarch64.cas64(ptr %[[TMPT]], i64 %[[TMPC]], i64 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i64 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__cas64'
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"}
>From dd9545dcb2fb24ee957d17d0adc61bd17082cbad Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Thu, 23 Apr 2026 12:16:24 -0300
Subject: [PATCH 11/26] [aarch64] Add support for casa builtin
The new intrinsic issues CASA* instruction directly, regardless of LSE
target features. This will be used to implement MSVC __casaX
builtins.
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 15 +++++++
.../lib/Target/AArch64/AArch64InstrAtomics.td | 16 ++++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 1 +
.../test/CodeGen/AArch64/ms-intrinsics-cas.ll | 40 +++++++++++++++++++
4 files changed, 72 insertions(+)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 8dce37165f035..bd8747efb7c02 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -49,6 +49,21 @@ def int_aarch64_cas64 : Intrinsic<[llvm_i64_ty],
[llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
[IntrNoFree, IntrWillReturn]>;
+// CASA intrinsics — emit CASA* (acquire, no release) directly, regardless of
+// the LSE target feature. Same type conventions as cas*.
+def int_aarch64_casa8 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_casa16 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_casa32 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_casa64 : Intrinsic<[llvm_i64_ty],
+ [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
+ [IntrNoFree, IntrWillReturn]>;
+
def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 9ec3c50ee8e32..57b19d97b41a5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -586,6 +586,22 @@ def : Pat<(i32 (int_aarch64_cas32 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
def : Pat<(i64 (int_aarch64_cas64 GPR64sp:$Rn, GPR64:$Rs, GPR64:$Rt)),
(CASX_cg GPR64:$Rs, GPR64:$Rt, GPR64sp:$Rn)>;
+// Unconditional patterns for the __casa* MSVC builtins (acquire ordering).
+def : Pat<(i32 (int_aarch64_casa8 GPR64sp:$Rn,
+ (and GPR32:$Rs, (i32 255)), (and GPR32:$Rt, (i32 255)))),
+ (CASAB_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casa8 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASAB_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casa16 GPR64sp:$Rn,
+ (and GPR32:$Rs, (i32 65535)), (and GPR32:$Rt, (i32 65535)))),
+ (CASAH_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casa16 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASAH_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casa32 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASAW_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i64 (int_aarch64_casa64 GPR64sp:$Rn, GPR64:$Rs, GPR64:$Rt)),
+ (CASAX_cg GPR64:$Rs, GPR64:$Rt, GPR64sp:$Rn)>;
+
// v8.9a/v9.4a FEAT_LRCPC patterns
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 922c53606d507..29a480078a51b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3030,6 +3030,7 @@ defm CASAL : CompareAndSwap<1, 1, "al">;
// Used by the __cas* MS intrinsics to force CAS* emission regardless of -march.
let isCodeGenOnly = 1 in {
defm CAS : CompareAndSwap_cg<0, 0, "">;
+ defm CASA : CompareAndSwap_cg<1, 0, "a">;
}
// v8.1 atomic CASP
diff --git a/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll b/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
index b3ee250bd96c3..99f1e5b50132f 100644
--- a/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
+++ b/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
@@ -47,4 +47,44 @@ define i64 @test_cas64(ptr %p, i64 %rs, i64 %rt) {
; CHECK-NEXT: ret
%r = call i64 @llvm.aarch64.cas64(ptr %p, i64 %rs, i64 %rt)
ret i64 %r
+}
+
+define i32 @test_casa8(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_casa8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casab w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.casa8(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i32 @test_casa16(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_casa16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casah w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.casa16(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i32 @test_casa32(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_casa32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casa w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.casa32(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i64 @test_casa64(ptr %p, i64 %rs, i64 %rt) {
+; CHECK-LABEL: test_casa64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casa x1, x2, [x0]
+; CHECK-NEXT: mov x0, x1
+; CHECK-NEXT: ret
+ %r = call i64 @llvm.aarch64.casa64(ptr %p, i64 %rs, i64 %rt)
+ ret i64 %r
}
\ No newline at end of file
>From 7be34dabcd9b94f6d6b9d4fcfdb81352ea775d04 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Wed, 15 Apr 2026 17:39:55 -0300
Subject: [PATCH 12/26] [aarch64] Add support for the __casa{8|16|32|64} MS
intrinsics
Adds support for the following MSVC intrinsics:
* `__casa8` - maps to CASAB.
* `__casa16` - maps to CASAH.
* `__casa32` - maps to CASA.
* `__casa64` - maps to CASA.
The emit is done using new intrisincs to issue CASA* instruction
directly, regardless of LSE target feature. This mimics MSVC compiler
and the idea of the builtin to mimic an inline asm.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 4 ++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 20 ++++++-
clang/lib/Headers/arm64intr.h | 9 +++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 56 +++++++++++++++++++
4 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 340655a5349f3..b88f38654a665 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -446,4 +446,8 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __cas16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short, unsigned short)">;
def __cas32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int, unsigned int)">;
def __cas64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int, unsigned long long int)">;
+ def __casa8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char volatile *, unsigned char, unsigned char)">;
+ def __casa16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short, unsigned short)">;
+ def __casa32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int, unsigned int)">;
+ def __casa64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 29c009bef5e6f..bca38762082ab 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5426,7 +5426,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
if (BuiltinID == AArch64::BI__cas8 || BuiltinID == AArch64::BI__cas16 ||
- BuiltinID == AArch64::BI__cas32 || BuiltinID == AArch64::BI__cas64) {
+ BuiltinID == AArch64::BI__cas32 || BuiltinID == AArch64::BI__cas64 ||
+ BuiltinID == AArch64::BI__casa8 || BuiltinID == AArch64::BI__casa16 ||
+ BuiltinID == AArch64::BI__casa32 || BuiltinID == AArch64::BI__casa64) {
unsigned IntrID;
llvm::Type *IntrArgTy;
switch (BuiltinID) {
@@ -5446,6 +5448,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
IntrID = Intrinsic::aarch64_cas64;
IntrArgTy = Builder.getInt64Ty();
break;
+ case AArch64::BI__casa8:
+ IntrID = Intrinsic::aarch64_casa8;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__casa16:
+ IntrID = Intrinsic::aarch64_casa16;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__casa32:
+ IntrID = Intrinsic::aarch64_casa32;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__casa64:
+ IntrID = Intrinsic::aarch64_casa64;
+ IntrArgTy = Builder.getInt64Ty();
+ break;
default:
llvm_unreachable("missing builtin ID in switch!");
}
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 2540ac8bc5e4c..18e6cef556900 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -86,6 +86,15 @@ unsigned __int32 __cas32(unsigned __int32 volatile *, unsigned __int32,
unsigned __int64 __cas64(unsigned __int64 volatile *, unsigned __int64,
unsigned __int64);
+unsigned __int8 __casa8(unsigned __int8 volatile *, unsigned __int8,
+ unsigned __int8);
+unsigned __int16 __casa16(unsigned __int16 volatile *, unsigned __int16,
+ unsigned __int16);
+unsigned __int32 __casa32(unsigned __int32 volatile *, unsigned __int32,
+ unsigned __int32);
+unsigned __int64 __casa64(unsigned __int64 volatile *, unsigned __int64,
+ unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index 7fff821012269..87df4c5901ac8 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -933,6 +933,62 @@ unsigned long long int test__cas64(unsigned long long int volatile* t,
// CHECK-MSCOMPAT: ret i64 %[[RET]]
// CHECK-LINUX: error: call to undeclared function '__cas64'
+unsigned char test__casa8(unsigned char volatile* t, unsigned char c, unsigned char v)
+{
+ return __casa8 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @test__casa8(ptr{{.*}}%t, i8{{.*}}%c, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i8, ptr %c.addr, align 1
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i8, ptr %v.addr, align 1
+// CHECK-MSCOMPAT: %[[ZEXTC:[0-9]+]] = zext i8 %[[TMPC]] to i32
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i8 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.casa8(ptr %[[TMPT]], i32 %[[ZEXTC]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[RETT:[0-9]+]] = trunc i32 %[[RET]] to i8
+// CHECK-MSCOMPAT: ret i8 %[[RETT]]
+// CHECK-LINUX: error: call to undeclared function '__casa8'
+
+unsigned short test__casa16(unsigned short volatile* t, unsigned short c, unsigned short v)
+{
+ return __casa16 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @test__casa16(ptr{{.*}}%t, i16{{.*}}%c, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i16, ptr %c.addr, align 2
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i16, ptr %v.addr, align 2
+// CHECK-MSCOMPAT: %[[ZEXTC:[0-9]+]] = zext i16 %[[TMPC]] to i32
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i16 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.casa16(ptr %[[TMPT]], i32 %[[ZEXTC]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[RETT:[0-9]+]] = trunc i32 %[[RET]] to i16
+// CHECK-MSCOMPAT: ret i16 %[[RETT]]
+// CHECK-LINUX: error: call to undeclared function '__casa16'
+
+unsigned int test__casa32(unsigned int volatile* t, unsigned int c, unsigned int v)
+{
+ return __casa32 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @test__casa32(ptr{{.*}}%t, i32{{.*}}%c, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i32, ptr %c.addr, align 4
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i32, ptr %v.addr, align 4
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.casa32(ptr %[[TMPT]], i32 %[[TMPC]], i32 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i32 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__casa32'
+
+unsigned long long int test__casa64(unsigned long long int volatile* t,
+ unsigned long long int c,
+ unsigned long long int v)
+{
+ return __casa64 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @test__casa64(ptr{{.*}}%t, i64{{.*}}%c, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i64, ptr %c.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i64, ptr %v.addr, align 8
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i64 @llvm.aarch64.casa64(ptr %[[TMPT]], i64 %[[TMPC]], i64 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i64 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__casa64'
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"}
>From 9ddcb1bc3801ff7c6bd1db2224647a3ebeedab3f Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Thu, 23 Apr 2026 13:06:36 -0300
Subject: [PATCH 13/26] [aarch64] Add support for casl builtin
The new intrinsic issues CASL* instruction directly, regardless of LSE
target features. This will be used to implement MSVC __caslX
builtins.
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 15 +++++++
.../lib/Target/AArch64/AArch64InstrAtomics.td | 16 +++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 1 +
.../test/CodeGen/AArch64/ms-intrinsics-cas.ll | 42 ++++++++++++++++++-
4 files changed, 73 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index bd8747efb7c02..61f8a26d625b4 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -64,6 +64,21 @@ def int_aarch64_casa64 : Intrinsic<[llvm_i64_ty],
[llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
[IntrNoFree, IntrWillReturn]>;
+// CASL intrinsics — emit CASL* (release) directly, regardless of the LSE
+// target feature. Same type conventions as cas*.
+def int_aarch64_casl8 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_casl16 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_casl32 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_casl64 : Intrinsic<[llvm_i64_ty],
+ [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
+ [IntrNoFree, IntrWillReturn]>;
+
def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 57b19d97b41a5..12f108a77b518 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -602,6 +602,22 @@ def : Pat<(i32 (int_aarch64_casa32 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
def : Pat<(i64 (int_aarch64_casa64 GPR64sp:$Rn, GPR64:$Rs, GPR64:$Rt)),
(CASAX_cg GPR64:$Rs, GPR64:$Rt, GPR64sp:$Rn)>;
+// Unconditional patterns for the __casl* MSVC builtins (release ordering).
+def : Pat<(i32 (int_aarch64_casl8 GPR64sp:$Rn,
+ (and GPR32:$Rs, (i32 255)), (and GPR32:$Rt, (i32 255)))),
+ (CASLB_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casl8 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASLB_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casl16 GPR64sp:$Rn,
+ (and GPR32:$Rs, (i32 65535)), (and GPR32:$Rt, (i32 65535)))),
+ (CASLH_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casl16 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASLH_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casl32 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASLW_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i64 (int_aarch64_casl64 GPR64sp:$Rn, GPR64:$Rs, GPR64:$Rt)),
+ (CASLX_cg GPR64:$Rs, GPR64:$Rt, GPR64sp:$Rn)>;
+
// v8.9a/v9.4a FEAT_LRCPC patterns
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 29a480078a51b..34bc7569bb5ab 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3031,6 +3031,7 @@ defm CASAL : CompareAndSwap<1, 1, "al">;
let isCodeGenOnly = 1 in {
defm CAS : CompareAndSwap_cg<0, 0, "">;
defm CASA : CompareAndSwap_cg<1, 0, "a">;
+ defm CASL : CompareAndSwap_cg<0, 1, "l">;
}
// v8.1 atomic CASP
diff --git a/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll b/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
index 99f1e5b50132f..717f23b8910f5 100644
--- a/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
+++ b/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
@@ -87,4 +87,44 @@ define i64 @test_casa64(ptr %p, i64 %rs, i64 %rt) {
; CHECK-NEXT: ret
%r = call i64 @llvm.aarch64.casa64(ptr %p, i64 %rs, i64 %rt)
ret i64 %r
-}
\ No newline at end of file
+}
+
+define i32 @test_casl8(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_casl8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: caslb w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.casl8(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i32 @test_casl16(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_casl16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: caslh w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.casl16(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i32 @test_casl32(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_casl32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casl w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.casl32(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i64 @test_casl64(ptr %p, i64 %rs, i64 %rt) {
+; CHECK-LABEL: test_casl64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casl x1, x2, [x0]
+; CHECK-NEXT: mov x0, x1
+; CHECK-NEXT: ret
+ %r = call i64 @llvm.aarch64.casl64(ptr %p, i64 %rs, i64 %rt)
+ ret i64 %r
+}
>From 70373ccf862e00e8f95aa80a6c6dd955fbcbcc3e Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Thu, 16 Apr 2026 09:15:28 -0300
Subject: [PATCH 14/26] [aarch64] Add support for the __casl{8|16|32|64} MS
intrinsics
Adds support for the following MSVC intrinsics:
* `__casl8` - maps to CASLB.
* `__casl16` - maps to CASLH.
* `__casl32` - maps to CASL.
* `__casl64` - maps to CASL.
The emit is done using new intrisincs to issue CASL* instruction
directly, regardless of LSE target feature. This mimics MSVC compiler
and the idea of the builtin to mimic an inline asm.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 4 ++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 20 ++++++-
clang/lib/Headers/arm64intr.h | 9 +++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 56 +++++++++++++++++++
4 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index b88f38654a665..9dcacc6f2e991 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -450,4 +450,8 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __casa16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short, unsigned short)">;
def __casa32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int, unsigned int)">;
def __casa64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int, unsigned long long int)">;
+ def __casl8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char volatile *, unsigned char, unsigned char)">;
+ def __casl16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short, unsigned short)">;
+ def __casl32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int, unsigned int)">;
+ def __casl64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index bca38762082ab..52f87b11e7510 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5428,7 +5428,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
if (BuiltinID == AArch64::BI__cas8 || BuiltinID == AArch64::BI__cas16 ||
BuiltinID == AArch64::BI__cas32 || BuiltinID == AArch64::BI__cas64 ||
BuiltinID == AArch64::BI__casa8 || BuiltinID == AArch64::BI__casa16 ||
- BuiltinID == AArch64::BI__casa32 || BuiltinID == AArch64::BI__casa64) {
+ BuiltinID == AArch64::BI__casa32 || BuiltinID == AArch64::BI__casa64 ||
+ BuiltinID == AArch64::BI__casl8 || BuiltinID == AArch64::BI__casl16 ||
+ BuiltinID == AArch64::BI__casl32 || BuiltinID == AArch64::BI__casl64) {
unsigned IntrID;
llvm::Type *IntrArgTy;
switch (BuiltinID) {
@@ -5464,6 +5466,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
IntrID = Intrinsic::aarch64_casa64;
IntrArgTy = Builder.getInt64Ty();
break;
+ case AArch64::BI__casl8:
+ IntrID = Intrinsic::aarch64_casl8;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__casl16:
+ IntrID = Intrinsic::aarch64_casl16;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__casl32:
+ IntrID = Intrinsic::aarch64_casl32;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__casl64:
+ IntrID = Intrinsic::aarch64_casl64;
+ IntrArgTy = Builder.getInt64Ty();
+ break;
default:
llvm_unreachable("missing builtin ID in switch!");
}
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 18e6cef556900..75c56c0ad8303 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -95,6 +95,15 @@ unsigned __int32 __casa32(unsigned __int32 volatile *, unsigned __int32,
unsigned __int64 __casa64(unsigned __int64 volatile *, unsigned __int64,
unsigned __int64);
+unsigned __int8 __casl8(unsigned __int8 volatile *, unsigned __int8,
+ unsigned __int8);
+unsigned __int16 __casl16(unsigned __int16 volatile *, unsigned __int16,
+ unsigned __int16);
+unsigned __int32 __casl32(unsigned __int32 volatile *, unsigned __int32,
+ unsigned __int32);
+unsigned __int64 __casl64(unsigned __int64 volatile *, unsigned __int64,
+ unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index 87df4c5901ac8..e2c9cc9599045 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -989,6 +989,62 @@ unsigned long long int test__casa64(unsigned long long int volatile* t,
// CHECK-MSCOMPAT: ret i64 %[[RET]]
// CHECK-LINUX: error: call to undeclared function '__casa64'
+unsigned char test__casl8(unsigned char volatile* t, unsigned char c, unsigned char v)
+{
+ return __casl8 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @test__casl8(ptr{{.*}}%t, i8{{.*}}%c, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i8, ptr %c.addr, align 1
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i8, ptr %v.addr, align 1
+// CHECK-MSCOMPAT: %[[ZEXTC:[0-9]+]] = zext i8 %[[TMPC]] to i32
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i8 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.casl8(ptr %[[TMPT]], i32 %[[ZEXTC]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[RETT:[0-9]+]] = trunc i32 %[[RET]] to i8
+// CHECK-MSCOMPAT: ret i8 %[[RETT]]
+// CHECK-LINUX: error: call to undeclared function '__casl8'
+
+unsigned short test__casl16(unsigned short volatile* t, unsigned short c, unsigned short v)
+{
+ return __casl16 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @test__casl16(ptr{{.*}}%t, i16{{.*}}%c, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i16, ptr %c.addr, align 2
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i16, ptr %v.addr, align 2
+// CHECK-MSCOMPAT: %[[ZEXTC:[0-9]+]] = zext i16 %[[TMPC]] to i32
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i16 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.casl16(ptr %[[TMPT]], i32 %[[ZEXTC]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[RETT:[0-9]+]] = trunc i32 %[[RET]] to i16
+// CHECK-MSCOMPAT: ret i16 %[[RETT]]
+// CHECK-LINUX: error: call to undeclared function '__casl16'
+
+unsigned int test__casl32(unsigned int volatile* t, unsigned int c, unsigned int v)
+{
+ return __casl32 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @test__casl32(ptr{{.*}}%t, i32{{.*}}%c, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i32, ptr %c.addr, align 4
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i32, ptr %v.addr, align 4
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.casl32(ptr %[[TMPT]], i32 %[[TMPC]], i32 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i32 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__casl32'
+
+unsigned long long int test__casl64(unsigned long long int volatile* t,
+ unsigned long long int c,
+ unsigned long long int v)
+{
+ return __casl64 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @test__casl64(ptr{{.*}}%t, i64{{.*}}%c, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i64, ptr %c.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i64, ptr %v.addr, align 8
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i64 @llvm.aarch64.casl64(ptr %[[TMPT]], i64 %[[TMPC]], i64 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i64 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__casl64'
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"}
>From 62c79906113b22447629d4a729c6064360fc31e1 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Thu, 23 Apr 2026 13:28:11 -0300
Subject: [PATCH 15/26] [aarch64] Add support for casal builtin
The new intrinsic issues CASAL* instruction directly, regardless of LSE
target features. This will be used to implement MSVC __casalX
builtins.
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 15 +++++++
.../lib/Target/AArch64/AArch64InstrAtomics.td | 16 ++++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 1 +
.../test/CodeGen/AArch64/ms-intrinsics-cas.ll | 40 +++++++++++++++++++
4 files changed, 72 insertions(+)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 61f8a26d625b4..29b30b7d51d60 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -79,6 +79,21 @@ def int_aarch64_casl64 : Intrinsic<[llvm_i64_ty],
[llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
[IntrNoFree, IntrWillReturn]>;
+// CASAL intrinsics — emit CASAL* (acquire/release) directly, regardless of the LSE
+// target feature. Same type conventions as cas*.
+def int_aarch64_casal8 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_casal16 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_casal32 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_casal64 : Intrinsic<[llvm_i64_ty],
+ [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
+ [IntrNoFree, IntrWillReturn]>;
+
def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 12f108a77b518..f6fb0433cf7f5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -618,6 +618,22 @@ def : Pat<(i32 (int_aarch64_casl32 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
def : Pat<(i64 (int_aarch64_casl64 GPR64sp:$Rn, GPR64:$Rs, GPR64:$Rt)),
(CASLX_cg GPR64:$Rs, GPR64:$Rt, GPR64sp:$Rn)>;
+// Unconditional patterns for the __casal* MSVC builtins (acquire/release ordering).
+def : Pat<(i32 (int_aarch64_casal8 GPR64sp:$Rn,
+ (and GPR32:$Rs, (i32 255)), (and GPR32:$Rt, (i32 255)))),
+ (CASALB_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casal8 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASALB_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casal16 GPR64sp:$Rn,
+ (and GPR32:$Rs, (i32 65535)), (and GPR32:$Rt, (i32 65535)))),
+ (CASALH_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casal16 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASALH_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_casal32 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
+ (CASALW_cg GPR32:$Rs, GPR32:$Rt, GPR64sp:$Rn)>;
+def : Pat<(i64 (int_aarch64_casal64 GPR64sp:$Rn, GPR64:$Rs, GPR64:$Rt)),
+ (CASALX_cg GPR64:$Rs, GPR64:$Rt, GPR64sp:$Rn)>;
+
// v8.9a/v9.4a FEAT_LRCPC patterns
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 34bc7569bb5ab..87f4fbd4f21b7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3032,6 +3032,7 @@ let isCodeGenOnly = 1 in {
defm CAS : CompareAndSwap_cg<0, 0, "">;
defm CASA : CompareAndSwap_cg<1, 0, "a">;
defm CASL : CompareAndSwap_cg<0, 1, "l">;
+ defm CASAL : CompareAndSwap_cg<1, 1, "al">;
}
// v8.1 atomic CASP
diff --git a/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll b/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
index 717f23b8910f5..bb2d7ae508ed5 100644
--- a/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
+++ b/llvm/test/CodeGen/AArch64/ms-intrinsics-cas.ll
@@ -128,3 +128,43 @@ define i64 @test_casl64(ptr %p, i64 %rs, i64 %rt) {
%r = call i64 @llvm.aarch64.casl64(ptr %p, i64 %rs, i64 %rt)
ret i64 %r
}
+
+define i32 @test_casal8(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_casal8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casalb w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.casal8(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i32 @test_casal16(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_casal16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casalh w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.casal16(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i32 @test_casal32(ptr %p, i32 %rs, i32 %rt) {
+; CHECK-LABEL: test_casal32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casal w1, w2, [x0]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.casal32(ptr %p, i32 %rs, i32 %rt)
+ ret i32 %r
+}
+
+define i64 @test_casal64(ptr %p, i64 %rs, i64 %rt) {
+; CHECK-LABEL: test_casal64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: casal x1, x2, [x0]
+; CHECK-NEXT: mov x0, x1
+; CHECK-NEXT: ret
+ %r = call i64 @llvm.aarch64.casal64(ptr %p, i64 %rs, i64 %rt)
+ ret i64 %r
+}
>From 1be53f0e73dd0e46cc5207ce4215a4687850277a Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Thu, 16 Apr 2026 10:47:20 -0300
Subject: [PATCH 16/26] [aarch64] Add support for the __casal{8|16|32|64} MS
intrinsics
Adds support for the following MSVC intrinsics:
* `__casal8` - maps to CASALB.
* `__casal16` - maps to CASALH.
* `__casal32` - maps to CASAL.
* `__casal64` - maps to CASAL.
The emit is done using new intrisincs to issue CASAL* instruction
directly, regardless of LSE target feature. This mimics MSVC compiler
and the idea of the builtin to mimic an inline asm.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 4 ++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 20 ++++++-
clang/lib/Headers/arm64intr.h | 9 +++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 56 +++++++++++++++++++
4 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 9dcacc6f2e991..b166bf1bb7f88 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -454,4 +454,8 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __casl16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short, unsigned short)">;
def __casl32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int, unsigned int)">;
def __casl64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int, unsigned long long int)">;
+ def __casal8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char volatile *, unsigned char, unsigned char)">;
+ def __casal16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short, unsigned short)">;
+ def __casal32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int, unsigned int)">;
+ def __casal64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 52f87b11e7510..dae1038da3e98 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5430,7 +5430,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
BuiltinID == AArch64::BI__casa8 || BuiltinID == AArch64::BI__casa16 ||
BuiltinID == AArch64::BI__casa32 || BuiltinID == AArch64::BI__casa64 ||
BuiltinID == AArch64::BI__casl8 || BuiltinID == AArch64::BI__casl16 ||
- BuiltinID == AArch64::BI__casl32 || BuiltinID == AArch64::BI__casl64) {
+ BuiltinID == AArch64::BI__casl32 || BuiltinID == AArch64::BI__casl64 ||
+ BuiltinID == AArch64::BI__casal8 || BuiltinID == AArch64::BI__casal16 ||
+ BuiltinID == AArch64::BI__casal32 || BuiltinID == AArch64::BI__casal64) {
unsigned IntrID;
llvm::Type *IntrArgTy;
switch (BuiltinID) {
@@ -5482,6 +5484,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
IntrID = Intrinsic::aarch64_casl64;
IntrArgTy = Builder.getInt64Ty();
break;
+ case AArch64::BI__casal8:
+ IntrID = Intrinsic::aarch64_casal8;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__casal16:
+ IntrID = Intrinsic::aarch64_casal16;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__casal32:
+ IntrID = Intrinsic::aarch64_casal32;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__casal64:
+ IntrID = Intrinsic::aarch64_casal64;
+ IntrArgTy = Builder.getInt64Ty();
+ break;
default:
llvm_unreachable("missing builtin ID in switch!");
}
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 75c56c0ad8303..ca0fc81742384 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -104,6 +104,15 @@ unsigned __int32 __casl32(unsigned __int32 volatile *, unsigned __int32,
unsigned __int64 __casl64(unsigned __int64 volatile *, unsigned __int64,
unsigned __int64);
+unsigned __int8 __casal8(unsigned __int8 volatile *, unsigned __int8,
+ unsigned __int8);
+unsigned __int16 __casal16(unsigned __int16 volatile *, unsigned __int16,
+ unsigned __int16);
+unsigned __int32 __casal32(unsigned __int32 volatile *, unsigned __int32,
+ unsigned __int32);
+unsigned __int64 __casal64(unsigned __int64 volatile *, unsigned __int64,
+ unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index e2c9cc9599045..65ee33185c364 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -1045,6 +1045,62 @@ unsigned long long int test__casl64(unsigned long long int volatile* t,
// CHECK-MSCOMPAT: ret i64 %[[RET]]
// CHECK-LINUX: error: call to undeclared function '__casl64'
+unsigned char test__casal8(unsigned char volatile* t, unsigned char c, unsigned char v)
+{
+ return __casal8 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @test__casal8(ptr{{.*}}%t, i8{{.*}}%c, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i8, ptr %c.addr, align 1
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i8, ptr %v.addr, align 1
+// CHECK-MSCOMPAT: %[[ZEXTC:[0-9]+]] = zext i8 %[[TMPC]] to i32
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i8 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.casal8(ptr %[[TMPT]], i32 %[[ZEXTC]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[RETT:[0-9]+]] = trunc i32 %[[RET]] to i8
+// CHECK-MSCOMPAT: ret i8 %[[RETT]]
+// CHECK-LINUX: error: call to undeclared function '__casal8'
+
+unsigned short test__casal16(unsigned short volatile* t, unsigned short c, unsigned short v)
+{
+ return __casal16 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @test__casal16(ptr{{.*}}%t, i16{{.*}}%c, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i16, ptr %c.addr, align 2
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i16, ptr %v.addr, align 2
+// CHECK-MSCOMPAT: %[[ZEXTC:[0-9]+]] = zext i16 %[[TMPC]] to i32
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i16 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.casal16(ptr %[[TMPT]], i32 %[[ZEXTC]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[RETT:[0-9]+]] = trunc i32 %[[RET]] to i16
+// CHECK-MSCOMPAT: ret i16 %[[RETT]]
+// CHECK-LINUX: error: call to undeclared function '__casal16'
+
+unsigned int test__casal32(unsigned int volatile* t, unsigned int c, unsigned int v)
+{
+ return __casal32 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @test__casal32(ptr{{.*}}%t, i32{{.*}}%c, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i32, ptr %c.addr, align 4
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i32, ptr %v.addr, align 4
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.casal32(ptr %[[TMPT]], i32 %[[TMPC]], i32 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i32 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__casal32'
+
+unsigned long long int test__casal64(unsigned long long int volatile* t,
+ unsigned long long int c,
+ unsigned long long int v)
+{
+ return __casal64 (t, c, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @test__casal64(ptr{{.*}}%t, i64{{.*}}%c, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPC:[0-9]+]] = load i64, ptr %c.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i64, ptr %v.addr, align 8
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i64 @llvm.aarch64.casal64(ptr %[[TMPT]], i64 %[[TMPC]], i64 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i64 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__casal64'
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"}
>From 9b79b8fa5fb5ae868a94bada80e2f279c4dfe137 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Thu, 23 Apr 2026 15:33:25 -0300
Subject: [PATCH 17/26] [aarch64] Add support for swp builtin
The new intrinsic issues SWP* instruction directly, regardless of LSE
target features. This will be used to implement MSVC __swpX
builtins.
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 15 ++++++
.../lib/Target/AArch64/AArch64InstrAtomics.td | 16 +++++++
.../lib/Target/AArch64/AArch64InstrFormats.td | 21 ++++++---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 +++
.../test/CodeGen/AArch64/ms-intrinsics-swp.ll | 46 +++++++++++++++++++
5 files changed, 98 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 29b30b7d51d60..d17edb7935ce4 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -94,6 +94,21 @@ def int_aarch64_casal64 : Intrinsic<[llvm_i64_ty],
[llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
[IntrNoFree, IntrWillReturn]>;
+// SWP intrinsics — emit SWP* (no acquire/release) directly, regardless of the
+// LSE target feature. swp{b,h} value uses i32; swp64 uses i64.
+def int_aarch64_swp8 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swp16 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swp32 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swp64 : Intrinsic<[llvm_i64_ty],
+ [llvm_ptr_ty, llvm_i64_ty],
+ [IntrNoFree, IntrWillReturn]>;
+
def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index f6fb0433cf7f5..08d187363828c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -634,6 +634,22 @@ def : Pat<(i32 (int_aarch64_casal32 GPR64sp:$Rn, GPR32:$Rs, GPR32:$Rt)),
def : Pat<(i64 (int_aarch64_casal64 GPR64sp:$Rn, GPR64:$Rs, GPR64:$Rt)),
(CASALX_cg GPR64:$Rs, GPR64:$Rt, GPR64sp:$Rn)>;
+// Unconditional patterns for the __swp* MSVC builtins (no ordering).
+// As with swp{b,h}, the zero-extension mask may survive or be eliminated by the
+// DAG combiner, so both masked and plain forms are needed.
+def : Pat<(i32 (int_aarch64_swp8 GPR64sp:$Rn, (and GPR32:$Rs, (i32 255)))),
+ (SWPB_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swp8 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPB_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swp16 GPR64sp:$Rn, (and GPR32:$Rs, (i32 65535)))),
+ (SWPH_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swp16 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPH_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swp32 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPW_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i64 (int_aarch64_swp64 GPR64sp:$Rn, GPR64:$Rs)),
+ (SWPX_cg GPR64:$Rs, GPR64sp:$Rn)>;
+
// v8.9a/v9.4a FEAT_LRCPC patterns
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 2aabdb77b8582..d0f1a60e4f2cd 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12531,16 +12531,25 @@ class BaseSWP<string order, string size, RegisterClass RC>
let Inst{11-10} = 0b00;
let Inst{9-5} = Rn;
let Inst{4-0} = Rt;
- let Predicates = [HasLSE];
}
-multiclass Swap<bits<1> Acq, bits<1> Rel, string order> {
- let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseSWP<order, "b", GPR32>;
- let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseSWP<order, "h", GPR32>;
- let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseSWP<order, "", GPR32>;
- let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseSWP<order, "", GPR64>;
+multiclass Swap_impl<bits<1> Acq, bits<1> Rel, string order, list<Predicate> preds, string suffix> {
+ let Predicates = preds, Acq = Acq, Rel = Rel in {
+ let Sz = 0b00 in def "B" # suffix : BaseSWP<order, "b", GPR32>;
+ let Sz = 0b01 in def "H" # suffix : BaseSWP<order, "h", GPR32>;
+ let Sz = 0b10 in def "W" # suffix : BaseSWP<order, "", GPR32>;
+ let Sz = 0b11 in def "X" # suffix : BaseSWP<order, "", GPR64>;
+ }
}
+multiclass Swap<bits<1> Acq, bits<1> Rel, string order>
+ : Swap_impl<Acq, Rel, order, [HasLSE], "">;
+
+// Like Swap but without the HasLSE predicate — for isCodeGenOnly use by
+// MS intrinsics that must emit SWP* regardless of the -march setting.
+multiclass Swap_cg<bits<1> Acq, bits<1> Rel, string order>
+ : Swap_impl<Acq, Rel, order, [], "_cg">;
+
// v9.6a swap operations
class BaseSWPLSUI<string order, RegisterClass RC>
: I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swpt" # order,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 87f4fbd4f21b7..753389a68399a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3061,6 +3061,12 @@ defm SWPA : Swap<1, 0, "a">;
defm SWPL : Swap<0, 1, "l">;
defm SWPAL : Swap<1, 1, "al">;
+// isCodeGenOnly=1: excluded from asm parser/disassembler, no HasLSE predicate.
+// Used by the __swp* MS intrinsics to force SWP* emission regardless of -march.
+let isCodeGenOnly = 1 in {
+ defm SWP : Swap_cg<0, 0, "">;
+}
+
// v9.6a atomic swap (FEAT_LSUI)
let Predicates = [HasLSUI] in {
defm SWPT : SwapLSUI<0, 0, "">;
diff --git a/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll b/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
new file mode 100644
index 0000000000000..f670a9157ba6d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-windows-msvc -fast-isel=0 -global-isel=false \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-windows-msvc -mattr=+lse -fast-isel=0 \
+; RUN: -global-isel=false -verify-machineinstrs < %s | FileCheck %s
+
+; Tests for the __swp* MS builtins on AArch64. These lower to
+; llvm.aarch64.swp* intrinsics and must select SWP* regardless of whether
+; +lse is present in the target features (the _cg ISel variants have no
+; feature predicate).
+
+define i32 @test_swp8(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swp8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpb w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swp8(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i32 @test_swp16(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swp16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swph w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swp16(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i32 @test_swp32(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swp32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swp w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swp32(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i64 @test_swp64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_swp64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swp x1, x0, [x0]
+; CHECK-NEXT: ret
+ %r = call i64 @llvm.aarch64.swp64(ptr %p, i64 %v)
+ ret i64 %r
+}
\ No newline at end of file
>From b22dc8916fce27b5ff52df72e6cf7c6317622719 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Fri, 17 Apr 2026 10:05:23 -0300
Subject: [PATCH 18/26] [aarch64] Add support for the __swp{8|16|32|64} MS
intrinsics
Adds support for the following MSVC intrinsics:
* `__swp8` - maps to SWPB.
* `__swp16` - maps to SWPH.
* `__swp32` - maps to SWP.
* `__swp64` - maps to SWP.
The emit is done using new intrisincs to issue SWP* instruction
directly, regardless of LSE target feature. This mimics MSVC compiler
and the idea of the builtin to mimic an inline asm.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 5 ++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 36 ++++++++++++++
clang/lib/Headers/arm64intr.h | 5 ++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 49 +++++++++++++++++++
4 files changed, 95 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index b166bf1bb7f88..8c9ca4214a9af 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -458,4 +458,9 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __casal16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short, unsigned short)">;
def __casal32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int, unsigned int)">;
def __casal64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int, unsigned long long int)">;
+
+ def __swp8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char volatile *, unsigned char)">;
+ def __swp16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short)">;
+ def __swp32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int)">;
+ def __swp64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index dae1038da3e98..8829698a0f91b 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5520,6 +5520,42 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Result;
}
+ if (BuiltinID == AArch64::BI__swp8 || BuiltinID == AArch64::BI__swp16 ||
+ BuiltinID == AArch64::BI__swp32 || BuiltinID == AArch64::BI__swp64) {
+ unsigned IntrID;
+ llvm::Type *IntrArgTy;
+ switch (BuiltinID) {
+ case AArch64::BI__swp8:
+ IntrID = Intrinsic::aarch64_swp8;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swp16:
+ IntrID = Intrinsic::aarch64_swp16;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swp32:
+ IntrID = Intrinsic::aarch64_swp32;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swp64:
+ IntrID = Intrinsic::aarch64_swp64;
+ IntrArgTy = Builder.getInt64Ty();
+ break;
+ default:
+ llvm_unreachable("missing builtin ID in switch!");
+ }
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
+ if (Val->getType() != IntrArgTy)
+ Val = Builder.CreateZExt(Val, IntrArgTy);
+ Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntrID), {Ptr, Val});
+ // SWP{B/H} return i32 (zero-extended); truncate to declared type.
+ llvm::Type *RetTy = ConvertType(E->getType());
+ if (Result->getType() != RetTy)
+ Result = Builder.CreateTrunc(Result, RetTy);
+ return Result;
+ }
+
if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
return Builder.CreateFPTrunc(
Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index ca0fc81742384..49b6ca312b8e5 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -113,6 +113,11 @@ unsigned __int32 __casal32(unsigned __int32 volatile *, unsigned __int32,
unsigned __int64 __casal64(unsigned __int64 volatile *, unsigned __int64,
unsigned __int64);
+unsigned __int8 __swp8(unsigned __int8 volatile *, unsigned __int8);
+unsigned __int16 __swp16(unsigned __int16 volatile *, unsigned __int16);
+unsigned __int32 __swp32(unsigned __int32 volatile *, unsigned __int32);
+unsigned __int64 __swp64(unsigned __int64 volatile *, unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index 65ee33185c364..cf02c83d5f041 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -1101,6 +1101,55 @@ unsigned long long int test__casal64(unsigned long long int volatile* t,
// CHECK-MSCOMPAT: ret i64 %[[RET]]
// CHECK-LINUX: error: call to undeclared function '__casal64'
+unsigned char test__swp8(unsigned char volatile* t, unsigned char v)
+{
+ return __swp8(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @test__swp8(ptr{{.*}}%t, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i8, ptr %v.addr, align 1
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i8 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swp8(ptr %[[TMPT]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[TRUNC:[0-9]+]] = trunc i32 %[[RET]] to i8
+// CHECK-MSCOMPAT: ret i8 %[[TRUNC]]
+// CHECK-LINUX: error: call to undeclared function '__swp8'
+
+unsigned short test__swp16(unsigned short volatile* t, unsigned short v)
+{
+ return __swp16(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @test__swp16(ptr{{.*}}%t, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i16, ptr %v.addr, align 2
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i16 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swp16(ptr %[[TMPT]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[TRUNC:[0-9]+]] = trunc i32 %[[RET]] to i16
+// CHECK-MSCOMPAT: ret i16 %[[TRUNC]]
+// CHECK-LINUX: error: call to undeclared function '__swp16'
+
+unsigned int test__swp32(unsigned int volatile* t, unsigned int v)
+{
+ return __swp32(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @test__swp32(ptr{{.*}}%t, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i32, ptr %v.addr, align 4
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swp32(ptr %[[TMPT]], i32 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i32 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__swp32'
+
+unsigned long long int test__swp64(unsigned long long int volatile* t,
+ unsigned long long int v)
+{
+ return __swp64(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @test__swp64(ptr{{.*}}%t, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i64, ptr %v.addr, align 8
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i64 @llvm.aarch64.swp64(ptr %[[TMPT]], i64 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i64 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__swp64'
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"}
>From 47fbc4735e8c14a5b1e155d2b0d5cbf237ea78cc Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Thu, 23 Apr 2026 16:27:26 -0300
Subject: [PATCH 19/26] [aarch64] Add support for swpa builtin
The new intrinsic issues SWPA* instruction directly, regardless of LSE
target features. This will be used to implement MSVC __swpaX
builtins.
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 14 +++++++
.../lib/Target/AArch64/AArch64InstrAtomics.td | 13 +++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 1 +
.../test/CodeGen/AArch64/ms-intrinsics-swp.ll | 37 +++++++++++++++++++
4 files changed, 65 insertions(+)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index d17edb7935ce4..d529a9e91d36e 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -109,6 +109,20 @@ def int_aarch64_swp64 : Intrinsic<[llvm_i64_ty],
[llvm_ptr_ty, llvm_i64_ty],
[IntrNoFree, IntrWillReturn]>;
+// SWPA intrinsics — emit SWPA (acquire) directly, regardless of the LSE target
+def int_aarch64_swpa8 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swpa16 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swpa32 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swpa64 : Intrinsic<[llvm_i64_ty],
+ [llvm_ptr_ty, llvm_i64_ty],
+ [IntrNoFree, IntrWillReturn]>;
+
def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 08d187363828c..ce12ff7ad8bee 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -650,6 +650,19 @@ def : Pat<(i32 (int_aarch64_swp32 GPR64sp:$Rn, GPR32:$Rs)),
def : Pat<(i64 (int_aarch64_swp64 GPR64sp:$Rn, GPR64:$Rs)),
(SWPX_cg GPR64:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpa8 GPR64sp:$Rn, (and GPR32:$Rs, (i32 255)))),
+ (SWPAB_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpa8 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPAB_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpa16 GPR64sp:$Rn, (and GPR32:$Rs, (i32 65535)))),
+ (SWPAH_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpa16 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPAH_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpa32 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPAW_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i64 (int_aarch64_swpa64 GPR64sp:$Rn, GPR64:$Rs)),
+ (SWPAX_cg GPR64:$Rs, GPR64sp:$Rn)>;
+
// v8.9a/v9.4a FEAT_LRCPC patterns
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 753389a68399a..5de42cbaf6d87 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3065,6 +3065,7 @@ defm SWPAL : Swap<1, 1, "al">;
// Used by the __swp* MS intrinsics to force SWP* emission regardless of -march.
let isCodeGenOnly = 1 in {
defm SWP : Swap_cg<0, 0, "">;
+ defm SWPA : Swap_cg<1, 0, "a">;
}
// v9.6a atomic swap (FEAT_LSUI)
diff --git a/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll b/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
index f670a9157ba6d..d170a6d33218d 100644
--- a/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
+++ b/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
@@ -43,4 +43,41 @@ define i64 @test_swp64(ptr %p, i64 %v) {
; CHECK-NEXT: ret
%r = call i64 @llvm.aarch64.swp64(ptr %p, i64 %v)
ret i64 %r
+}
+
+
+define i32 @test_swpa8(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swpa8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpab w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swpa8(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i32 @test_swpa16(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swpa16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpah w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swpa16(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i32 @test_swpa32(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swpa32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpa w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swpa32(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i64 @test_swpa64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_swpa64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpa x1, x0, [x0]
+; CHECK-NEXT: ret
+ %r = call i64 @llvm.aarch64.swpa64(ptr %p, i64 %v)
+ ret i64 %r
}
\ No newline at end of file
>From 91a9836f5b69568a6fc7e726b607ad940b431553 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Fri, 17 Apr 2026 12:41:03 -0300
Subject: [PATCH 20/26] [aarch64] Add support for the __swpa{8|16|32|64} MS
intrinsics
Adds support for the following MSVC intrinsics:
* `__swpa8` - maps to SWPAB.
* `__swpa16` - maps to SWPAH.
* `__swpa32` - maps to SWPA.
* `__swpa64` - maps to SWPA.
The emit is done using new intrisincs to issue SWPA* instruction
directly, regardless of LSE target feature. This mimics MSVC compiler
and the idea of the builtin to mimic an inline asm.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 4 ++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 20 +++++++-
clang/lib/Headers/arm64intr.h | 5 ++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 49 +++++++++++++++++++
4 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 8c9ca4214a9af..1df9ade3a9a81 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -463,4 +463,8 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __swp16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short)">;
def __swp32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int)">;
def __swp64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int)">;
+ def __swpa8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char volatile *, unsigned char)">;
+ def __swpa16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short)">;
+ def __swpa32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int)">;
+ def __swpa64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 8829698a0f91b..f509d6a6092ad 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5521,7 +5521,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
if (BuiltinID == AArch64::BI__swp8 || BuiltinID == AArch64::BI__swp16 ||
- BuiltinID == AArch64::BI__swp32 || BuiltinID == AArch64::BI__swp64) {
+ BuiltinID == AArch64::BI__swp32 || BuiltinID == AArch64::BI__swp64 ||
+ BuiltinID == AArch64::BI__swpa8 || BuiltinID == AArch64::BI__swpa16 ||
+ BuiltinID == AArch64::BI__swpa32 || BuiltinID == AArch64::BI__swpa64) {
unsigned IntrID;
llvm::Type *IntrArgTy;
switch (BuiltinID) {
@@ -5541,6 +5543,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
IntrID = Intrinsic::aarch64_swp64;
IntrArgTy = Builder.getInt64Ty();
break;
+ case AArch64::BI__swpa8:
+ IntrID = Intrinsic::aarch64_swpa8;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swpa16:
+ IntrID = Intrinsic::aarch64_swpa16;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swpa32:
+ IntrID = Intrinsic::aarch64_swpa32;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swpa64:
+ IntrID = Intrinsic::aarch64_swpa64;
+ IntrArgTy = Builder.getInt64Ty();
+ break;
default:
llvm_unreachable("missing builtin ID in switch!");
}
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 49b6ca312b8e5..21d7029d82533 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -118,6 +118,11 @@ unsigned __int16 __swp16(unsigned __int16 volatile *, unsigned __int16);
unsigned __int32 __swp32(unsigned __int32 volatile *, unsigned __int32);
unsigned __int64 __swp64(unsigned __int64 volatile *, unsigned __int64);
+unsigned __int8 __swpa8(unsigned __int8 volatile *, unsigned __int8);
+unsigned __int16 __swpa16(unsigned __int16 volatile *, unsigned __int16);
+unsigned __int32 __swpa32(unsigned __int32 volatile *, unsigned __int32);
+unsigned __int64 __swpa64(unsigned __int64 volatile *, unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index cf02c83d5f041..356dbd5c3d450 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -1150,6 +1150,55 @@ unsigned long long int test__swp64(unsigned long long int volatile* t,
// CHECK-MSCOMPAT: ret i64 %[[RET]]
// CHECK-LINUX: error: call to undeclared function '__swp64'
+unsigned char test__swpa8(unsigned char volatile* t, unsigned char v)
+{
+ return __swpa8(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @test__swpa8(ptr{{.*}}%t, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i8, ptr %v.addr, align 1
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i8 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swpa8(ptr %[[TMPT]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[TRUNC:[0-9]+]] = trunc i32 %[[RET]] to i8
+// CHECK-MSCOMPAT: ret i8 %[[TRUNC]]
+// CHECK-LINUX: error: call to undeclared function '__swpa8'
+
+unsigned short test__swpa16(unsigned short volatile* t, unsigned short v)
+{
+ return __swpa16(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @test__swpa16(ptr{{.*}}%t, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i16, ptr %v.addr, align 2
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i16 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swpa16(ptr %[[TMPT]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[TRUNC:[0-9]+]] = trunc i32 %[[RET]] to i16
+// CHECK-MSCOMPAT: ret i16 %[[TRUNC]]
+// CHECK-LINUX: error: call to undeclared function '__swpa16'
+
+unsigned int test__swpa32(unsigned int volatile* t, unsigned int v)
+{
+ return __swpa32(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @test__swpa32(ptr{{.*}}%t, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i32, ptr %v.addr, align 4
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swpa32(ptr %[[TMPT]], i32 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i32 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__swpa32'
+
+unsigned long long int test__swpa64(unsigned long long int volatile* t,
+ unsigned long long int v)
+{
+ return __swpa64(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @test__swpa64(ptr{{.*}}%t, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i64, ptr %v.addr, align 8
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i64 @llvm.aarch64.swpa64(ptr %[[TMPT]], i64 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i64 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__swpa64'
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"}
>From 51f264e884b1838e20d06849f0a07f9e07f18ae7 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Thu, 23 Apr 2026 18:06:06 -0300
Subject: [PATCH 21/26] [aarch64] Add support for swpl builtin
The new intrinsic issues SWPL* instruction directly, regardless of LSE
target features. This will be used to implement MSVC __swplX
builtins.
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 14 +++++++
.../lib/Target/AArch64/AArch64InstrAtomics.td | 13 +++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 1 +
.../test/CodeGen/AArch64/ms-intrinsics-swp.ll | 38 ++++++++++++++++++-
4 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index d529a9e91d36e..9c87644ba476d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -123,6 +123,20 @@ def int_aarch64_swpa64 : Intrinsic<[llvm_i64_ty],
[llvm_ptr_ty, llvm_i64_ty],
[IntrNoFree, IntrWillReturn]>;
+// SWPL intrinsics — emit SWPL (release) directly, regardless of the LSE target
+def int_aarch64_swpl8 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swpl16 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swpl32 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swpl64 : Intrinsic<[llvm_i64_ty],
+ [llvm_ptr_ty, llvm_i64_ty],
+ [IntrNoFree, IntrWillReturn]>;
+
def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index ce12ff7ad8bee..2496a8990da61 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -663,6 +663,19 @@ def : Pat<(i32 (int_aarch64_swpa32 GPR64sp:$Rn, GPR32:$Rs)),
def : Pat<(i64 (int_aarch64_swpa64 GPR64sp:$Rn, GPR64:$Rs)),
(SWPAX_cg GPR64:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpl8 GPR64sp:$Rn, (and GPR32:$Rs, (i32 255)))),
+ (SWPLB_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpl8 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPLB_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpl16 GPR64sp:$Rn, (and GPR32:$Rs, (i32 65535)))),
+ (SWPLH_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpl16 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPLH_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpl32 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPLW_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i64 (int_aarch64_swpl64 GPR64sp:$Rn, GPR64:$Rs)),
+ (SWPLX_cg GPR64:$Rs, GPR64sp:$Rn)>;
+
// v8.9a/v9.4a FEAT_LRCPC patterns
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 5de42cbaf6d87..a559ff4863cc5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3066,6 +3066,7 @@ defm SWPAL : Swap<1, 1, "al">;
let isCodeGenOnly = 1 in {
defm SWP : Swap_cg<0, 0, "">;
defm SWPA : Swap_cg<1, 0, "a">;
+ defm SWPL : Swap_cg<0, 1, "l">;
}
// v9.6a atomic swap (FEAT_LSUI)
diff --git a/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll b/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
index d170a6d33218d..4275b1ced09b6 100644
--- a/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
+++ b/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
@@ -80,4 +80,40 @@ define i64 @test_swpa64(ptr %p, i64 %v) {
; CHECK-NEXT: ret
%r = call i64 @llvm.aarch64.swpa64(ptr %p, i64 %v)
ret i64 %r
-}
\ No newline at end of file
+}
+
+define i32 @test_swpl8(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swpl8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swplb w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swpl8(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i32 @test_swpl16(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swpl16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swplh w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swpl16(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i32 @test_swpl32(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swpl32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpl w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swpl32(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i64 @test_swpl64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_swpl64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpl x1, x0, [x0]
+; CHECK-NEXT: ret
+ %r = call i64 @llvm.aarch64.swpl64(ptr %p, i64 %v)
+ ret i64 %r
+}
>From ce898533061244f010bbedbefb9b13021e1d69cd Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Fri, 17 Apr 2026 13:02:47 -0300
Subject: [PATCH 22/26] [aarch64] Add support for the __swpl{8|16|32|64} MS
intrinsics
Adds support for the following MSVC intrinsics:
* `__swpl8` - maps to SWPLB.
* `__swpl16` - maps to SWPLH.
* `__swpl32` - maps to SWPL.
* `__swpl64` - maps to SWPL.
The emit is done using new intrisincs to issue SWPL* instruction
directly, regardless of LSE target feature. This mimics MSVC compiler
and the idea of the builtin to mimic an inline asm.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 4 ++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 20 +++++++-
clang/lib/Headers/arm64intr.h | 5 ++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 49 +++++++++++++++++++
4 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 1df9ade3a9a81..13627a0abbc7a 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -467,4 +467,8 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __swpa16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short)">;
def __swpa32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int)">;
def __swpa64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int)">;
+ def __swpl8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char volatile *, unsigned char)">;
+ def __swpl16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short)">;
+ def __swpl32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int)">;
+ def __swpl64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index f509d6a6092ad..2d418ef7bf3cb 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5523,7 +5523,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
if (BuiltinID == AArch64::BI__swp8 || BuiltinID == AArch64::BI__swp16 ||
BuiltinID == AArch64::BI__swp32 || BuiltinID == AArch64::BI__swp64 ||
BuiltinID == AArch64::BI__swpa8 || BuiltinID == AArch64::BI__swpa16 ||
- BuiltinID == AArch64::BI__swpa32 || BuiltinID == AArch64::BI__swpa64) {
+ BuiltinID == AArch64::BI__swpa32 || BuiltinID == AArch64::BI__swpa64 ||
+ BuiltinID == AArch64::BI__swpl8 || BuiltinID == AArch64::BI__swpl16 ||
+ BuiltinID == AArch64::BI__swpl32 || BuiltinID == AArch64::BI__swpl64) {
unsigned IntrID;
llvm::Type *IntrArgTy;
switch (BuiltinID) {
@@ -5559,6 +5561,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
IntrID = Intrinsic::aarch64_swpa64;
IntrArgTy = Builder.getInt64Ty();
break;
+ case AArch64::BI__swpl8:
+ IntrID = Intrinsic::aarch64_swpl8;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swpl16:
+ IntrID = Intrinsic::aarch64_swpl16;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swpl32:
+ IntrID = Intrinsic::aarch64_swpl32;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swpl64:
+ IntrID = Intrinsic::aarch64_swpl64;
+ IntrArgTy = Builder.getInt64Ty();
+ break;
default:
llvm_unreachable("missing builtin ID in switch!");
}
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 21d7029d82533..707f43c51a422 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -123,6 +123,11 @@ unsigned __int16 __swpa16(unsigned __int16 volatile *, unsigned __int16);
unsigned __int32 __swpa32(unsigned __int32 volatile *, unsigned __int32);
unsigned __int64 __swpa64(unsigned __int64 volatile *, unsigned __int64);
+unsigned __int8 __swpl8(unsigned __int8 volatile *, unsigned __int8);
+unsigned __int16 __swpl16(unsigned __int16 volatile *, unsigned __int16);
+unsigned __int32 __swpl32(unsigned __int32 volatile *, unsigned __int32);
+unsigned __int64 __swpl64(unsigned __int64 volatile *, unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index 356dbd5c3d450..e379bb615ecf6 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -1199,6 +1199,55 @@ unsigned long long int test__swpa64(unsigned long long int volatile* t,
// CHECK-MSCOMPAT: ret i64 %[[RET]]
// CHECK-LINUX: error: call to undeclared function '__swpa64'
+unsigned char test__swpl8(unsigned char volatile* t, unsigned char v)
+{
+ return __swpl8(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @test__swpl8(ptr{{.*}}%t, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i8, ptr %v.addr, align 1
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i8 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swpl8(ptr %[[TMPT]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[TRUNC:[0-9]+]] = trunc i32 %[[RET]] to i8
+// CHECK-MSCOMPAT: ret i8 %[[TRUNC]]
+// CHECK-LINUX: error: call to undeclared function '__swpl8'
+
+unsigned short test__swpl16(unsigned short volatile* t, unsigned short v)
+{
+ return __swpl16(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @test__swpl16(ptr{{.*}}%t, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i16, ptr %v.addr, align 2
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i16 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swpl16(ptr %[[TMPT]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[TRUNC:[0-9]+]] = trunc i32 %[[RET]] to i16
+// CHECK-MSCOMPAT: ret i16 %[[TRUNC]]
+// CHECK-LINUX: error: call to undeclared function '__swpl16'
+
+unsigned int test__swpl32(unsigned int volatile* t, unsigned int v)
+{
+ return __swpl32(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @test__swpl32(ptr{{.*}}%t, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i32, ptr %v.addr, align 4
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swpl32(ptr %[[TMPT]], i32 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i32 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__swpl32'
+
+unsigned long long int test__swpl64(unsigned long long int volatile* t,
+ unsigned long long int v)
+{
+ return __swpl64(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @test__swpl64(ptr{{.*}}%t, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i64, ptr %v.addr, align 8
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i64 @llvm.aarch64.swpl64(ptr %[[TMPT]], i64 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i64 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__swpl64'
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"}
>From 7bc1b42f2a8fe21e0d8c66259422ddf626ed0c25 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Thu, 23 Apr 2026 18:23:55 -0300
Subject: [PATCH 23/26] [aarch64] Add support for swpal builtin
The new intrinsic issues SWPAL* instruction directly, regardless of LSE
target features. This will be used to implement MSVC __swpalX
builtins.
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 15 ++++++++
.../lib/Target/AArch64/AArch64InstrAtomics.td | 13 +++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 1 +
.../test/CodeGen/AArch64/ms-intrinsics-swp.ll | 36 +++++++++++++++++++
4 files changed, 65 insertions(+)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 9c87644ba476d..3e5cd1b945ec5 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -137,6 +137,21 @@ def int_aarch64_swpl64 : Intrinsic<[llvm_i64_ty],
[llvm_ptr_ty, llvm_i64_ty],
[IntrNoFree, IntrWillReturn]>;
+// SWPAL intrinsics — emit SWPAL* (no acquire/release) directly, regardless of the
+// LSE target feature.
+def int_aarch64_swpal8 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swpal16 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swpal32 : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoFree, IntrWillReturn]>;
+def int_aarch64_swpal64 : Intrinsic<[llvm_i64_ty],
+ [llvm_ptr_ty, llvm_i64_ty],
+ [IntrNoFree, IntrWillReturn]>;
+
def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 2496a8990da61..74d40ac64de7a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -676,6 +676,19 @@ def : Pat<(i32 (int_aarch64_swpl32 GPR64sp:$Rn, GPR32:$Rs)),
def : Pat<(i64 (int_aarch64_swpl64 GPR64sp:$Rn, GPR64:$Rs)),
(SWPLX_cg GPR64:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpal8 GPR64sp:$Rn, (and GPR32:$Rs, (i32 255)))),
+ (SWPALB_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpal8 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPALB_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpal16 GPR64sp:$Rn, (and GPR32:$Rs, (i32 65535)))),
+ (SWPALH_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpal16 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPALH_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_swpal32 GPR64sp:$Rn, GPR32:$Rs)),
+ (SWPALW_cg GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(i64 (int_aarch64_swpal64 GPR64sp:$Rn, GPR64:$Rs)),
+ (SWPALX_cg GPR64:$Rs, GPR64sp:$Rn)>;
+
// v8.9a/v9.4a FEAT_LRCPC patterns
let Predicates = [HasRCPC3, HasNEON] in {
// LDAP1 loads
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a559ff4863cc5..b3cc2db450484 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3067,6 +3067,7 @@ let isCodeGenOnly = 1 in {
defm SWP : Swap_cg<0, 0, "">;
defm SWPA : Swap_cg<1, 0, "a">;
defm SWPL : Swap_cg<0, 1, "l">;
+ defm SWPAL : Swap_cg<1, 1, "al">;
}
// v9.6a atomic swap (FEAT_LSUI)
diff --git a/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll b/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
index 4275b1ced09b6..c01fee0c9e1ff 100644
--- a/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
+++ b/llvm/test/CodeGen/AArch64/ms-intrinsics-swp.ll
@@ -117,3 +117,39 @@ define i64 @test_swpl64(ptr %p, i64 %v) {
%r = call i64 @llvm.aarch64.swpl64(ptr %p, i64 %v)
ret i64 %r
}
+
+define i32 @test_swpal8(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swpal8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpalb w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swpal8(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i32 @test_swpal16(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swpal16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpalh w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swpal16(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i32 @test_swpal32(ptr %p, i32 %v) {
+; CHECK-LABEL: test_swpal32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpal w1, w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.swpal32(ptr %p, i32 %v)
+ ret i32 %r
+}
+
+define i64 @test_swpal64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_swpal64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpal x1, x0, [x0]
+; CHECK-NEXT: ret
+ %r = call i64 @llvm.aarch64.swpal64(ptr %p, i64 %v)
+ ret i64 %r
+}
>From 2d44fbf88bd01860655fd3ec6163f051753e01af Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Fri, 17 Apr 2026 13:20:15 -0300
Subject: [PATCH 24/26] [aarch64] Add support for the __swpal{8|16|32|64} MS
intrinsics
Adds support for the following MSVC intrinsics:
* `__swpal8` - maps to SWPALB.
* `__swpal16` - maps to SWPALH.
* `__swpal32` - maps to SWPAL.
* `__swpal64` - maps to SWPAL.
The emit is done using new intrisincs to issue SWPAL* instruction
directly, regardless of LSE target feature. This mimics MSVC compiler
and the idea of the builtin to mimic an inline asm.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 4 ++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 20 +++++++-
clang/lib/Headers/arm64intr.h | 5 ++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 50 +++++++++++++++++++
4 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index 13627a0abbc7a..e8fc874595eae 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -471,4 +471,8 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __swpl16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short)">;
def __swpl32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int)">;
def __swpl64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int)">;
+ def __swpal8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char volatile *, unsigned char)">;
+ def __swpal16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short)">;
+ def __swpal32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int)">;
+ def __swpal64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 2d418ef7bf3cb..c4540124cc40f 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5525,7 +5525,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
BuiltinID == AArch64::BI__swpa8 || BuiltinID == AArch64::BI__swpa16 ||
BuiltinID == AArch64::BI__swpa32 || BuiltinID == AArch64::BI__swpa64 ||
BuiltinID == AArch64::BI__swpl8 || BuiltinID == AArch64::BI__swpl16 ||
- BuiltinID == AArch64::BI__swpl32 || BuiltinID == AArch64::BI__swpl64) {
+ BuiltinID == AArch64::BI__swpl32 || BuiltinID == AArch64::BI__swpl64 ||
+ BuiltinID == AArch64::BI__swpal8 || BuiltinID == AArch64::BI__swpal16 ||
+ BuiltinID == AArch64::BI__swpal32 || BuiltinID == AArch64::BI__swpal64) {
unsigned IntrID;
llvm::Type *IntrArgTy;
switch (BuiltinID) {
@@ -5577,6 +5579,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
IntrID = Intrinsic::aarch64_swpl64;
IntrArgTy = Builder.getInt64Ty();
break;
+ case AArch64::BI__swpal8:
+ IntrID = Intrinsic::aarch64_swpal8;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swpal16:
+ IntrID = Intrinsic::aarch64_swpal16;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swpal32:
+ IntrID = Intrinsic::aarch64_swpal32;
+ IntrArgTy = Builder.getInt32Ty();
+ break;
+ case AArch64::BI__swpal64:
+ IntrID = Intrinsic::aarch64_swpal64;
+ IntrArgTy = Builder.getInt64Ty();
+ break;
default:
llvm_unreachable("missing builtin ID in switch!");
}
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 707f43c51a422..3d6701cc78902 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -128,6 +128,11 @@ unsigned __int16 __swpl16(unsigned __int16 volatile *, unsigned __int16);
unsigned __int32 __swpl32(unsigned __int32 volatile *, unsigned __int32);
unsigned __int64 __swpl64(unsigned __int64 volatile *, unsigned __int64);
+unsigned __int8 __swpal8(unsigned __int8 volatile *, unsigned __int8);
+unsigned __int16 __swpal16(unsigned __int16 volatile *, unsigned __int16);
+unsigned __int32 __swpal32(unsigned __int32 volatile *, unsigned __int32);
+unsigned __int64 __swpal64(unsigned __int64 volatile *, unsigned __int64);
+
#ifdef __cplusplus
}
#endif
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index e379bb615ecf6..2caf2c40b8a29 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -1248,6 +1248,56 @@ unsigned long long int test__swpl64(unsigned long long int volatile* t,
// CHECK-MSCOMPAT: ret i64 %[[RET]]
// CHECK-LINUX: error: call to undeclared function '__swpl64'
+unsigned char test__swpal8(unsigned char volatile* t, unsigned char v)
+{
+ return __swpal8(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @test__swpal8(ptr{{.*}}%t, i8{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i8, ptr %v.addr, align 1
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i8 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swpal8(ptr %[[TMPT]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[TRUNC:[0-9]+]] = trunc i32 %[[RET]] to i8
+// CHECK-MSCOMPAT: ret i8 %[[TRUNC]]
+// CHECK-LINUX: error: call to undeclared function '__swpal8'
+
+unsigned short test__swpal16(unsigned short volatile* t, unsigned short v)
+{
+ return __swpal16(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @test__swpal16(ptr{{.*}}%t, i16{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i16, ptr %v.addr, align 2
+// CHECK-MSCOMPAT: %[[ZEXTV:[0-9]+]] = zext i16 %[[TMPV]] to i32
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swpal16(ptr %[[TMPT]], i32 %[[ZEXTV]])
+// CHECK-MSCOMPAT: %[[TRUNC:[0-9]+]] = trunc i32 %[[RET]] to i16
+// CHECK-MSCOMPAT: ret i16 %[[TRUNC]]
+// CHECK-LINUX: error: call to undeclared function '__swpal16'
+
+unsigned int test__swpal32(unsigned int volatile* t, unsigned int v)
+{
+ return __swpal32(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @test__swpal32(ptr{{.*}}%t, i32{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i32, ptr %v.addr, align 4
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.swpal32(ptr %[[TMPT]], i32 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i32 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__swpal32'
+
+unsigned long long int test__swpal64(unsigned long long int volatile* t,
+ unsigned long long int v)
+{
+ return __swpal64(t, v);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @test__swpal64(ptr{{.*}}%t, i64{{.*}}%v){{.*}}{
+// CHECK-MSCOMPAT: %[[TMPT:[0-9]+]] = load ptr, ptr %t.addr, align 8
+// CHECK-MSCOMPAT: %[[TMPV:[0-9]+]] = load i64, ptr %v.addr, align 8
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i64 @llvm.aarch64.swpal64(ptr %[[TMPT]], i64 %[[TMPV]])
+// CHECK-MSCOMPAT: ret i64 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__swpal64'
+
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
// CHECK-MSCOMPAT: ![[MD4]] = !{!"d5"}
>From c9a3bd47725d238fe09525db8508921f959716d5 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Wed, 22 Apr 2026 16:26:44 -0300
Subject: [PATCH 25/26] [aarch64] Add support for ldarp builtin
The new intrinsic issues LDAPR* instruction directly, regardless of RCPC
target features. This will be used to implement MSVC __ldaprX builtins.
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 15 ++++++
.../lib/Target/AArch64/AArch64InstrAtomics.td | 7 +++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 8 ++++
.../CodeGen/AArch64/ms-intrinsics-ldapr.ll | 46 +++++++++++++++++++
4 files changed, 76 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/ms-intrinsics-ldapr.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 3e5cd1b945ec5..c19bd837018f1 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -152,6 +152,21 @@ def int_aarch64_swpal64 : Intrinsic<[llvm_i64_ty],
[llvm_ptr_ty, llvm_i64_ty],
[IntrNoFree, IntrWillReturn]>;
+// LDAPR intrinsics — emit LDAPR* directly, regardless of RCPC target feature.
+// LDAPRB/H/W return i32 (zero-extended into GPR32); LDAPRX returns i64.
+def int_aarch64_ldapr8 : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+ [IntrNoFree, IntrWillReturn,
+ IntrReadMem, IntrArgMemOnly]>;
+def int_aarch64_ldapr16 : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+ [IntrNoFree, IntrWillReturn,
+ IntrReadMem, IntrArgMemOnly]>;
+def int_aarch64_ldapr32 : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+ [IntrNoFree, IntrWillReturn,
+ IntrReadMem, IntrArgMemOnly]>;
+def int_aarch64_ldapr64 : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
+ [IntrNoFree, IntrWillReturn,
+ IntrReadMem, IntrArgMemOnly]>;
+
def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 74d40ac64de7a..c5ee58e4c60ed 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -60,6 +60,13 @@ let Predicates = [HasRCPC] in {
def : Pat<(acquiring_load<atomic_load_nonext_64> GPR64sp:$ptr), (LDAPRX GPR64sp:$ptr)>;
}
+// Unconditional patterns for the __ldapr* MSVC builtins. These force LDAPR*
+// emission regardless of the RCPC subtarget feature.
+def : Pat<(i32 (int_aarch64_ldapr8 GPR64sp:$Rn)), (LDAPRB_cg GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_ldapr16 GPR64sp:$Rn)), (LDAPRH_cg GPR64sp:$Rn)>;
+def : Pat<(i32 (int_aarch64_ldapr32 GPR64sp:$Rn)), (LDAPRW_cg GPR64sp:$Rn)>;
+def : Pat<(i64 (int_aarch64_ldapr64 GPR64sp:$Rn)), (LDAPRX_cg GPR64sp:$Rn)>;
+
// 8-bit loads
def : Pat<(seq_cst_load<atomic_load_azext_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
def : Pat<(acquiring_load<atomic_load_azext_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index b3cc2db450484..122fa59754749 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1933,6 +1933,14 @@ let Predicates = [HasRCPC] in {
def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>;
}
+// Code-gen-only LDAPR variants for the __ldapr* MSVC builtins.
+let isCodeGenOnly = 1 in {
+ def LDAPRB_cg : RCPCLoad<0b00, "ldaprb", GPR32>;
+ def LDAPRH_cg : RCPCLoad<0b01, "ldaprh", GPR32>;
+ def LDAPRW_cg : RCPCLoad<0b10, "ldapr", GPR32>;
+ def LDAPRX_cg : RCPCLoad<0b11, "ldapr", GPR64>;
+}
+
// v8.3a complex add and multiply-accumulate. No predicate here, that is done
// inside the multiclass as the FP16 versions need different predicates.
defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
diff --git a/llvm/test/CodeGen/AArch64/ms-intrinsics-ldapr.ll b/llvm/test/CodeGen/AArch64/ms-intrinsics-ldapr.ll
new file mode 100644
index 0000000000000..1c1073a16bdb9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ms-intrinsics-ldapr.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-windows-msvc -fast-isel=0 -global-isel=false \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-windows-msvc -mattr=+rcpc -fast-isel=0 \
+; RUN: -global-isel=false -verify-machineinstrs < %s | FileCheck %s
+
+; Tests for the __ldapr* MS builtins on AArch64. These lower to the
+; llvm.aarch64.ldaprN intrinsics and must select ldaprb/h/w/x regardless
+; of whether +rcpc is present in the target features (the _cg ISel variants
+; have no feature predicate).
+
+define i32 @test_ldapr8(ptr %p) {
+; CHECK-LABEL: test_ldapr8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldaprb w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.ldapr8(ptr %p)
+ ret i32 %r
+}
+
+define i32 @test_ldapr16(ptr %p) {
+; CHECK-LABEL: test_ldapr16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldaprh w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.ldapr16(ptr %p)
+ ret i32 %r
+}
+
+define i32 @test_ldapr32(ptr %p) {
+; CHECK-LABEL: test_ldapr32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldapr w0, [x0]
+; CHECK-NEXT: ret
+ %r = call i32 @llvm.aarch64.ldapr32(ptr %p)
+ ret i32 %r
+}
+
+define i64 @test_ldapr64(ptr %p) {
+; CHECK-LABEL: test_ldapr64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldapr x0, [x0]
+; CHECK-NEXT: ret
+ %r = call i64 @llvm.aarch64.ldapr64(ptr %p)
+ ret i64 %r
+}
\ No newline at end of file
>From eb71b1e6c97231fb95c560c3ecfd41742eed5508 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <zatrazz at gmail.com>
Date: Tue, 14 Apr 2026 11:27:39 -0300
Subject: [PATCH 26/26] [aarch64] Add support for the __ldapr{8|16|32|64} MS
intrinsics
Adds support for the following MSVC intrinsics:
* `__ldapr8` - LDAPRB
* `__ldapr16` - LDAPRH
* `__ldapr32` - LDAPR
* `__ldapr64` - LDAPR
The emit is done using new intrisincs to issue LDAPR* instruction
directly, regardless of RCPC target features. This mimics MSVC compiler
and the idea of the builtin to mimic an inline asm.
These are documented at:
<https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-180>
---
clang/include/clang/Basic/BuiltinsAArch64.td | 5 +++
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 26 +++++++++++++
clang/lib/Headers/arm64intr.h | 5 +++
.../test/CodeGen/arm64-microsoft-intrinsics.c | 37 +++++++++++++++++++
4 files changed, 73 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td
index e8fc874595eae..4b9e97b9949b7 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.td
+++ b/clang/include/clang/Basic/BuiltinsAArch64.td
@@ -475,4 +475,9 @@ let Attributes = [NoThrow, RequireDeclaration], Languages = "ALL_MS_LANGUAGES",
def __swpal16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short volatile *, unsigned short)">;
def __swpal32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int volatile *, unsigned int)">;
def __swpal64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int volatile *, unsigned long long int)">;
+
+ def __ldapr8 : AArch64NoPrefixTargetLibBuiltin<"unsigned char (unsigned char const volatile *)">;
+ def __ldapr16 : AArch64NoPrefixTargetLibBuiltin<"unsigned short (unsigned short const volatile *)">;
+ def __ldapr32 : AArch64NoPrefixTargetLibBuiltin<"unsigned int (unsigned int const volatile *)">;
+ def __ldapr64 : AArch64NoPrefixTargetLibBuiltin<"unsigned long long int (unsigned long long int const volatile *)">;
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index c4540124cc40f..51d1b214d4e2c 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5610,6 +5610,32 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Result;
}
+ if (BuiltinID == AArch64::BI__ldapr8 || BuiltinID == AArch64::BI__ldapr16 ||
+ BuiltinID == AArch64::BI__ldapr32 || BuiltinID == AArch64::BI__ldapr64) {
+ unsigned IntrID;
+ switch (BuiltinID) {
+ case AArch64::BI__ldapr8:
+ IntrID = Intrinsic::aarch64_ldapr8;
+ break;
+ case AArch64::BI__ldapr16:
+ IntrID = Intrinsic::aarch64_ldapr16;
+ break;
+ case AArch64::BI__ldapr32:
+ IntrID = Intrinsic::aarch64_ldapr32;
+ break;
+ default:
+ IntrID = Intrinsic::aarch64_ldapr64;
+ break;
+ }
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntrID), Ptr);
+ // LDAPRB/H return i32 (zero-extended); truncate to match the declared type.
+ llvm::Type *RetTy = ConvertType(E->getType());
+ if (Result->getType() != RetTy)
+ Result = Builder.CreateTrunc(Result, RetTy);
+ return Result;
+ }
+
if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
return Builder.CreateFPTrunc(
Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
diff --git a/clang/lib/Headers/arm64intr.h b/clang/lib/Headers/arm64intr.h
index 3d6701cc78902..ea7d2109278bf 100644
--- a/clang/lib/Headers/arm64intr.h
+++ b/clang/lib/Headers/arm64intr.h
@@ -50,6 +50,11 @@ unsigned __int16 __ldar16(const volatile unsigned __int16 *);
unsigned __int32 __ldar32(const volatile unsigned __int32 *);
unsigned __int64 __ldar64(const volatile unsigned __int64 *);
+unsigned __int8 __ldapr8(const volatile unsigned __int8 *);
+unsigned __int16 __ldapr16(const volatile unsigned __int16 *);
+unsigned __int32 __ldapr32(const volatile unsigned __int32 *);
+unsigned __int64 __ldapr64(const volatile unsigned __int64 *);
+
void __stlr8(unsigned __int8 volatile *, unsigned __int8);
void __stlr16(unsigned __int16 volatile *, unsigned __int16);
void __stlr32(unsigned __int32 volatile *, unsigned __int32);
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index 2caf2c40b8a29..2d5c5f040f419 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -1297,6 +1297,43 @@ unsigned long long int test__swpal64(unsigned long long int volatile* t,
// CHECK-MSCOMPAT: ret i64 %[[RET]]
// CHECK-LINUX: error: call to undeclared function '__swpal64'
+unsigned char check__ldapr8(unsigned char volatile *p) {
+ return __ldapr8(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i8 @check__ldapr8(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[LOAD:[0-9]+]] = load ptr, ptr %p.addr, align 8
+// CHECK-MSCOMPAT: %[[TMP:[0-9]+]] = call i32 @llvm.aarch64.ldapr8(ptr %[[LOAD]])
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = trunc i32 %[[TMP]] to i8
+// CHECK-MSCOMPAT: ret i8 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__ldapr8'
+
+unsigned short check__ldapr16(unsigned short volatile *p) {
+ return __ldapr16(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i16 @check__ldapr16(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[LOAD:[0-9]+]] = load ptr, ptr %p.addr, align 8
+// CHECK-MSCOMPAT: %[[TMP:[0-9]+]] = call i32 @llvm.aarch64.ldapr16(ptr %[[LOAD]])
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = trunc i32 %[[TMP]] to i16
+// CHECK-MSCOMPAT: ret i16 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__ldapr16'
+
+unsigned int check__ldapr32(unsigned int volatile *p) {
+ return __ldapr32(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i32 @check__ldapr32(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[LOAD:[0-9]+]] = load ptr, ptr %p.addr, align 8
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i32 @llvm.aarch64.ldapr32(ptr %[[LOAD]])
+// CHECK-MSCOMPAT: ret i32 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__ldapr32'
+
+unsigned long long int check__ldapr64(unsigned long long int volatile *p) {
+ return __ldapr64(p);
+}
+// CHECK-MSCOMPAT-LABEL: define{{.*}}i64 @check__ldapr64(ptr{{.*}}%p){{.*}}{
+// CHECK-MSCOMPAT: %[[LOAD:[0-9]+]] = load ptr, ptr %p.addr, align 8
+// CHECK-MSCOMPAT: %[[RET:[0-9]+]] = call i64 @llvm.aarch64.ldapr64(ptr %[[LOAD]])
+// CHECK-MSCOMPAT: ret i64 %[[RET]]
+// CHECK-LINUX: error: call to undeclared function '__ldapr64'
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
More information about the cfe-commits
mailing list