[clang] [llvm] [AArch64] Implement GCS ACLE intrinsics (PR #96903)
John Brawn via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 03:50:42 PDT 2024
https://github.com/john-brawn-arm updated https://github.com/llvm/llvm-project/pull/96903
>From 123ebe17131fcfc7662be6759327b311e90fb16d Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Tue, 25 Jun 2024 15:35:18 +0100
Subject: [PATCH 1/3] [AArch64] Implement GCS ACLE intrinsics
This adds intrinsics defined in ARM-software/acle#260
Doing this requires some changes to the GCS instruction definitions,
as these intrinsics make use of how some instructions don't modify the
input register when GCS is disabled, and they need to be correctly
marked with mayLoad/mayStore/hasSideEffects for instruction selection
to work.
---
clang/include/clang/Basic/BuiltinsAArch64.def | 6 ++
clang/lib/Headers/arm_acle.h | 27 +++++++++
clang/test/CodeGen/aarch64-gcs.c | 57 +++++++++++++++++++
llvm/include/llvm/IR/IntrinsicsAArch64.td | 17 ++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 22 +++++--
llvm/test/CodeGen/AArch64/gcs-intrinsics.ll | 49 ++++++++++++++++
6 files changed, 173 insertions(+), 5 deletions(-)
create mode 100644 clang/test/CodeGen/aarch64-gcs.c
create mode 100644 llvm/test/CodeGen/AArch64/gcs-intrinsics.ll
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 5fb199b1b2b03..8c48437e86315 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -49,6 +49,7 @@ BUILTIN(__builtin_arm_wfe, "v", "")
BUILTIN(__builtin_arm_wfi, "v", "")
BUILTIN(__builtin_arm_sev, "v", "")
BUILTIN(__builtin_arm_sevl, "v", "")
+BUILTIN(__builtin_arm_chkfeat, "WUiWUi", "")
// Like __builtin_trap but provide an 16-bit immediate reason code (which goes into `brk #N`).
BUILTIN(__builtin_arm_trap, "vUIs", "nr")
@@ -136,6 +137,11 @@ TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64")
TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64")
TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
+// Armv9.3-A Guarded Control Stack
+TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs")
+TARGET_BUILTIN(__builtin_arm_gcsss1, "vvC*", "n", "gcs")
+TARGET_BUILTIN(__builtin_arm_gcsss2, "vC*vC*", "n", "gcs")
+
TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_BitScanForward64, "UcUNi*ULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 5785954c9171a..4d3e8a30013cd 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -75,6 +75,14 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v
#define __dbg(t) __builtin_arm_dbg(t)
#endif
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+#define _CHKFEAT_GCS 1
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__chkfeat(uint64_t __features) {
+ return __builtin_arm_chkfeat(__features) ^ __features;
+}
+#endif
+
/* 7.5 Swap */
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__swp(uint32_t __x, volatile uint32_t *__p) {
@@ -855,6 +863,25 @@ __rndrrs(uint64_t *__p) {
}
#endif
+/* 11.2 Guarded Control Stack intrinsics */
+#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
+static __inline__ void * __attribute__((__always_inline__, __nodebug__))
+__gcspr() {
+ return (void *)__builtin_arm_rsr64("gcspr_el0");
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("gcs")))
+__gcspopm() {
+ return __builtin_arm_gcspopm(0);
+}
+
+static __inline__ const void * __attribute__((__always_inline__, __nodebug__, target("gcs")))
+__gcsss(const void *__stack) {
+ __builtin_arm_gcsss1(__stack);
+ return __builtin_arm_gcsss2(0);
+}
+#endif
+
#if defined(__cplusplus)
}
#endif
diff --git a/clang/test/CodeGen/aarch64-gcs.c b/clang/test/CodeGen/aarch64-gcs.c
new file mode 100644
index 0000000000000..e19946cf72f7f
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-gcs.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +gcs -emit-llvm %s -o - | FileCheck %s
+
+#include <arm_acle.h>
+
+// CHECK-LABEL: define dso_local i64 @test_chkfeat
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[__FEATURES_ADDR_I:%.*]] = alloca i64, align 8
+// CHECK-NEXT: store i64 1, ptr [[__FEATURES_ADDR_I]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[__FEATURES_ADDR_I]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.chkfeat(i64 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[__FEATURES_ADDR_I]], align 8
+// CHECK-NEXT: [[XOR_I:%.*]] = xor i64 [[TMP1]], [[TMP2]]
+// CHECK-NEXT: ret i64 [[XOR_I]]
+//
+uint64_t test_chkfeat() {
+ return __chkfeat(_CHKFEAT_GCS);
+}
+
+// CHECK-LABEL: define dso_local ptr @test_gcspr
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.read_volatile_register.i64(metadata [[META2:![0-9]+]])
+// CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr
+// CHECK-NEXT: ret ptr [[TMP1]]
+//
+void *test_gcspr() {
+ return __gcspr();
+}
+
+// CHECK-LABEL: define dso_local i64 @test_gcspopm
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.aarch64.gcspopm(i64 0)
+// CHECK-NEXT: ret i64 [[TMP0]]
+//
+uint64_t test_gcspopm() {
+ return __gcspopm();
+}
+
+// CHECK-LABEL: define dso_local ptr @test_gcsss
+// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[__STACK_ADDR_I:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[__STACK_ADDR_I]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__STACK_ADDR_I]], align 8
+// CHECK-NEXT: call void @llvm.aarch64.gcsss1(ptr [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.aarch64.gcsss2(ptr null)
+// CHECK-NEXT: ret ptr [[TMP2]]
+//
+const void *test_gcsss(const void *p) {
+ return __gcsss(p);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 38d71b17b476d..5c5c864141c6c 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -89,6 +89,23 @@ def int_aarch64_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
// ordering during ISel.
def int_aarch64_space : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>;
+//===----------------------------------------------------------------------===//
+// Guarded Control Stack
+
+def int_aarch64_chkfeat : ClangBuiltin<"__builtin_arm_chkfeat">,
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
+ [IntrNoMem]>;
+
+def int_aarch64_gcspopm : ClangBuiltin<"__builtin_arm_gcspopm">,
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
+ [IntrReadMem, IntrHasSideEffects]>;
+
+def int_aarch64_gcsss1 : ClangBuiltin<"__builtin_arm_gcsss1">,
+ DefaultAttrsIntrinsic<[], [llvm_ptr_ty], []>;
+
+def int_aarch64_gcsss2 : ClangBuiltin<"__builtin_arm_gcsss2">,
+ DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f3aac3b46d173..e0de418625e10 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1267,23 +1267,34 @@ class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
let Inst{15-8} = 0b01110111;
let Inst{7-5} = op2;
let Predicates = [HasGCS];
+ let hasSideEffects = 1;
}
-def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">;
+let mayStore = 1, mayLoad = 1 in
+def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1", [(int_aarch64_gcsss1 (i64 GPR64:$Rt))]>;
+let mayStore = 1 in
def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
list<dag> pattern = []>
- : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> {
+ : RtSystemI<1, (outs GPR64:$Rt), (ins GPR64:$src), mnemonic, "\t$Rt", pattern> {
let Inst{20-19} = 0b01;
let Inst{18-16} = op1;
let Inst{15-8} = 0b01110111;
let Inst{7-5} = op2;
let Predicates = [HasGCS];
+ let hasSideEffects = 1;
+ // The input register is unchanged when GCS is disabled, so we need it as
+ // both an input and output operand.
+ let Constraints = "$src = $Rt";
}
-def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">;
-def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">;
+let mayStore = 1, mayLoad = 1 in
+def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2",
+ [(set GPR64:$Rt, (int_aarch64_gcsss2 GPR64:$src))]>;
+let mayLoad = 1 in
+def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm",
+ [(set GPR64:$Rt, (int_aarch64_gcspopm GPR64:$src))]>;
def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>;
@@ -1292,7 +1303,8 @@ def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGC
def : TokenAlias<"DSYNC", "dsync">;
let Uses = [X16], Defs = [X16], CRm = 0b0101 in {
- def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">;
+ def CHKFEAT : SystemNoOperands<0b000, "hint\t#40",
+ [(set X16, (int_aarch64_chkfeat X16))]>;
}
def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>;
def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
diff --git a/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll b/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll
new file mode 100644
index 0000000000000..93c95569723f5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -mattr=+gcs -verify-machineinstrs -o - %s | FileCheck %s
+
+define i64 @test_chkfeat(i64 %arg) {
+; CHECK-LABEL: test_chkfeat:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x16, x0
+; CHECK-NEXT: chkfeat x16
+; CHECK-NEXT: mov x0, x16
+; CHECK-NEXT: ret
+entry:
+ %0 = call i64 @llvm.aarch64.chkfeat(i64 %arg)
+ ret i64 %0
+}
+
+define i64 @test_gcspopm(i64 %arg) {
+; CHECK-LABEL: test_gcspopm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: gcspopm x0
+; CHECK-NEXT: ret
+entry:
+ %0 = call i64 @llvm.aarch64.gcspopm(i64 %arg)
+ ret i64 %0
+}
+
+define void @test_gcsss1(ptr %p) {
+; CHECK-LABEL: test_gcsss1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: gcsss1 x0
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.aarch64.gcsss1(ptr %p)
+ ret void
+}
+
+define ptr @test_gcsss2(ptr %p) {
+; CHECK-LABEL: test_gcsss2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: gcsss2 x0
+; CHECK-NEXT: ret
+entry:
+ %0 = call ptr @llvm.aarch64.gcsss2(ptr %p)
+ ret ptr %0
+}
+
+declare i64 @llvm.aarch64.chkfeat(i64)
+declare i64 @llvm.aarch64.gcspopm(i64)
+declare void @llvm.aarch64.gcsss1(ptr)
+declare ptr @llvm.aarch64.gcsss2(ptr)
>From 952d68e99ca1d225f7ad6343eb4bd01811eedca7 Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Wed, 3 Jul 2024 17:55:55 +0100
Subject: [PATCH 2/3] Use a single gcsss builtin instead of separate gcsss1 and
gcsss2
---
clang/include/clang/Basic/BuiltinsAArch64.def | 3 +--
clang/lib/Headers/arm_acle.h | 3 +--
clang/test/CodeGen/aarch64-gcs.c | 3 +--
llvm/include/llvm/IR/IntrinsicsAArch64.td | 7 ++----
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 12 ++++++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 ++---
llvm/test/CodeGen/AArch64/gcs-intrinsics.ll | 22 ++++++-------------
7 files changed, 26 insertions(+), 30 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 8c48437e86315..56110df79e807 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -139,8 +139,7 @@ TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
// Armv9.3-A Guarded Control Stack
TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs")
-TARGET_BUILTIN(__builtin_arm_gcsss1, "vvC*", "n", "gcs")
-TARGET_BUILTIN(__builtin_arm_gcsss2, "vC*vC*", "n", "gcs")
+TARGET_BUILTIN(__builtin_arm_gcsss, "vC*vC*", "n", "gcs")
TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 4d3e8a30013cd..1518b0c4c8428 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -877,8 +877,7 @@ __gcspopm() {
static __inline__ const void * __attribute__((__always_inline__, __nodebug__, target("gcs")))
__gcsss(const void *__stack) {
- __builtin_arm_gcsss1(__stack);
- return __builtin_arm_gcsss2(0);
+ return __builtin_arm_gcsss(__stack);
}
#endif
diff --git a/clang/test/CodeGen/aarch64-gcs.c b/clang/test/CodeGen/aarch64-gcs.c
index e19946cf72f7f..767b1b8bfaf2d 100644
--- a/clang/test/CodeGen/aarch64-gcs.c
+++ b/clang/test/CodeGen/aarch64-gcs.c
@@ -48,8 +48,7 @@ uint64_t test_gcspopm() {
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
// CHECK-NEXT: store ptr [[TMP0]], ptr [[__STACK_ADDR_I]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__STACK_ADDR_I]], align 8
-// CHECK-NEXT: call void @llvm.aarch64.gcsss1(ptr [[TMP1]])
-// CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.aarch64.gcsss2(ptr null)
+// CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.aarch64.gcsss(ptr [[TMP1]])
// CHECK-NEXT: ret ptr [[TMP2]]
//
const void *test_gcsss(const void *p) {
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 5c5c864141c6c..bcbb58a147a57 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -100,11 +100,8 @@ def int_aarch64_gcspopm : ClangBuiltin<"__builtin_arm_gcspopm">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
[IntrReadMem, IntrHasSideEffects]>;
-def int_aarch64_gcsss1 : ClangBuiltin<"__builtin_arm_gcsss1">,
- DefaultAttrsIntrinsic<[], [llvm_ptr_ty], []>;
-
-def int_aarch64_gcsss2 : ClangBuiltin<"__builtin_arm_gcsss2">,
- DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+def int_aarch64_gcsss : ClangBuiltin<"__builtin_arm_gcsss">,
+ DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 544eec3ab9cec..97e03be0f6ef0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -4577,6 +4577,18 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
switch (IntNo) {
default:
break;
+ case Intrinsic::aarch64_gcsss: {
+ SDLoc DL(Node);
+ SDValue Chain = Node->getOperand(0);
+ SDValue Val = Node->getOperand(2);
+ SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
+ SDNode *SS1 =
+ CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
+ SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
+ MVT::Other, Zero, SDValue(SS1, 0));
+ ReplaceNode(Node, SS2);
+ return;
+ }
case Intrinsic::aarch64_ldaxp:
case Intrinsic::aarch64_ldxp: {
unsigned Op =
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index e0de418625e10..dad92be2905c4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1271,7 +1271,7 @@ class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
}
let mayStore = 1, mayLoad = 1 in
-def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1", [(int_aarch64_gcsss1 (i64 GPR64:$Rt))]>;
+def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">;
let mayStore = 1 in
def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
@@ -1290,8 +1290,7 @@ class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
}
let mayStore = 1, mayLoad = 1 in
-def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2",
- [(set GPR64:$Rt, (int_aarch64_gcsss2 GPR64:$src))]>;
+def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">;
let mayLoad = 1 in
def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm",
[(set GPR64:$Rt, (int_aarch64_gcspopm GPR64:$src))]>;
@@ -1323,7 +1322,6 @@ class GCSSt<string mnemonic, bits<3> op>
def GCSSTR : GCSSt<"gcsstr", 0b000>;
def GCSSTTR : GCSSt<"gcssttr", 0b001>;
-
// ARMv8.2-A Dot Product
let Predicates = [HasDotProd] in {
defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
diff --git a/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll b/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll
index 93c95569723f5..a93b829f86840 100644
--- a/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll
@@ -23,27 +23,19 @@ entry:
ret i64 %0
}
-define void @test_gcsss1(ptr %p) {
-; CHECK-LABEL: test_gcsss1:
+define ptr @test_gcsss(ptr %p) {
+; CHECK-LABEL: test_gcsss:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: gcsss1 x0
+; CHECK-NEXT: gcsss2 x8
+; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
entry:
- call void @llvm.aarch64.gcsss1(ptr %p)
- ret void
-}
-
-define ptr @test_gcsss2(ptr %p) {
-; CHECK-LABEL: test_gcsss2:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: gcsss2 x0
-; CHECK-NEXT: ret
-entry:
- %0 = call ptr @llvm.aarch64.gcsss2(ptr %p)
+ %0 = call ptr @llvm.aarch64.gcsss(ptr %p)
ret ptr %0
}
declare i64 @llvm.aarch64.chkfeat(i64)
declare i64 @llvm.aarch64.gcspopm(i64)
-declare void @llvm.aarch64.gcsss1(ptr)
-declare ptr @llvm.aarch64.gcsss2(ptr)
+declare ptr @llvm.aarch64.gcsss(ptr)
>From eed6d78980fb78f79a259b3ac2239542da9b9a93 Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Wed, 10 Jul 2024 16:09:44 +0100
Subject: [PATCH 3/3] Adjust attributes on gcspopm intrinsic to work around
tablegen bug
---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 ++++-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 3 ++-
llvm/test/CodeGen/AArch64/gcs-intrinsics.ll | 19 ++++++++++++++++---
3 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index bcbb58a147a57..246f717aaef96 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -96,9 +96,12 @@ def int_aarch64_chkfeat : ClangBuiltin<"__builtin_arm_chkfeat">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
[IntrNoMem]>;
+// FIXME: This should be marked as [IntrReadMem, IntrHasSideEffects], as it has
+// the side-effect of updating gcspr, but this combination doesn't work
+// correctly.
def int_aarch64_gcspopm : ClangBuiltin<"__builtin_arm_gcspopm">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
- [IntrReadMem, IntrHasSideEffects]>;
+ []>;
def int_aarch64_gcsss : ClangBuiltin<"__builtin_arm_gcsss">,
DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index dad92be2905c4..49ea9d952521c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1291,7 +1291,8 @@ class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
let mayStore = 1, mayLoad = 1 in
def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">;
-let mayLoad = 1 in
+// FIXME: mayStore = 1 only needed to match the intrinsic definition
+let mayStore = 1, mayLoad = 1 in
def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm",
[(set GPR64:$Rt, (int_aarch64_gcspopm GPR64:$src))]>;
def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
diff --git a/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll b/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll
index a93b829f86840..b2f9b4d34ac4c 100644
--- a/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll
@@ -1,6 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 -mattr=+gcs -verify-machineinstrs -o - %s | FileCheck %s
+; We call each intrinsic twice, once with the result being unused and once with
+; it being used, to check that dead code elimination is being done correctly.
+; chkfeat does not have side effects so can be eliminated, but the others do and
+; can't be eliminated.
+
define i64 @test_chkfeat(i64 %arg) {
; CHECK-LABEL: test_chkfeat:
; CHECK: // %bb.0: // %entry
@@ -10,30 +15,38 @@ define i64 @test_chkfeat(i64 %arg) {
; CHECK-NEXT: ret
entry:
%0 = call i64 @llvm.aarch64.chkfeat(i64 %arg)
- ret i64 %0
+ %1 = call i64 @llvm.aarch64.chkfeat(i64 %arg)
+ ret i64 %1
}
define i64 @test_gcspopm(i64 %arg) {
; CHECK-LABEL: test_gcspopm:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: gcspopm x8
; CHECK-NEXT: gcspopm x0
; CHECK-NEXT: ret
entry:
%0 = call i64 @llvm.aarch64.gcspopm(i64 %arg)
- ret i64 %0
+ %1 = call i64 @llvm.aarch64.gcspopm(i64 %arg)
+ ret i64 %1
}
define ptr @test_gcsss(ptr %p) {
; CHECK-LABEL: test_gcsss:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: gcsss1 x0
; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: gcsss2 x9
; CHECK-NEXT: gcsss1 x0
; CHECK-NEXT: gcsss2 x8
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
entry:
%0 = call ptr @llvm.aarch64.gcsss(ptr %p)
- ret ptr %0
+ %1 = call ptr @llvm.aarch64.gcsss(ptr %p)
+ ret ptr %1
}
declare i64 @llvm.aarch64.chkfeat(i64)
More information about the llvm-commits
mailing list