[clang] [llvm] [clang][llvm][aarch64] Add aarch64_sme_in_streaming_mode intrinsic (PR #120265)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 17 08:55:46 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Nicholas Guy (NickGuy-Arm)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/120265.diff
6 Files Affected:
- (modified) clang/include/clang/Basic/arm_sme.td (+2)
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c (+16-22)
- (modified) clang/utils/TableGen/SveEmitter.cpp (+2-6)
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+1)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+9)
- (added) llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll (+44)
``````````diff
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 6b31dec004a1e2..e66a023f998ed4 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -716,6 +716,8 @@ let SMETargetGuard = "sme2" in {
def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
}
+def IN_STREAMING_MODE : Inst<"in_streaming_mode", "d", "", MergeNone, "aarch64_sme_in_streaming_mode", [IsOverloadNone, IsStreamingCompatible], []>;
+
//
// lookup table expand four contiguous registers
//
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c
index 9ba1527f269663..e880f7d7dbacd8 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c
@@ -8,19 +8,13 @@
// CHECK-LABEL: @test_in_streaming_mode(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]]
-// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
-// CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1
-// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0
-// CHECK-NEXT: ret i1 [[TOBOOL_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
+// CHECK-NEXT: ret i1 [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z22test_in_streaming_modev(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]]
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
-// CPP-CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1
-// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0
-// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
+// CPP-CHECK-NEXT: ret i1 [[TMP0]]
//
bool test_in_streaming_mode(void) __arm_streaming_compatible {
return __arm_in_streaming_mode();
@@ -28,12 +22,12 @@ bool test_in_streaming_mode(void) __arm_streaming_compatible {
// CHECK-LABEL: @test_za_disable(
// CHECK-NEXT: entry:
-// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR3]]
+// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]]
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z15test_za_disablev(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR3]]
+// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]]
// CPP-CHECK-NEXT: ret void
//
void test_za_disable(void) __arm_streaming_compatible {
@@ -42,14 +36,14 @@ void test_za_disable(void) __arm_streaming_compatible {
// CHECK-LABEL: @test_has_sme(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]]
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
// CPP-CHECK-LABEL: @_Z12test_has_smev(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]]
@@ -72,12 +66,12 @@ void test_svundef_za(void) __arm_streaming_compatible __arm_out("za") {
// CHECK-LABEL: @test_sc_memcpy(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
// CHECK-NEXT: ret ptr [[CALL]]
//
// CPP-CHECK-LABEL: @_Z14test_sc_memcpyPvPKvm(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
// CPP-CHECK-NEXT: ret ptr [[CALL]]
//
void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible {
@@ -86,12 +80,12 @@ void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_comp
// CHECK-LABEL: @test_sc_memmove(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
// CHECK-NEXT: ret ptr [[CALL]]
//
// CPP-CHECK-LABEL: @_Z15test_sc_memmovePvPKvm(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
// CPP-CHECK-NEXT: ret ptr [[CALL]]
//
void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible {
@@ -100,12 +94,12 @@ void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_com
// CHECK-LABEL: @test_sc_memset(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
// CHECK-NEXT: ret ptr [[CALL]]
//
// CPP-CHECK-LABEL: @_Z14test_sc_memsetPvim(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
// CPP-CHECK-NEXT: ret ptr [[CALL]]
//
void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible {
@@ -114,12 +108,12 @@ void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible {
// CHECK-LABEL: @test_sc_memchr(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
// CHECK-NEXT: ret ptr [[CALL]]
//
// CPP-CHECK-LABEL: @_Z14test_sc_memchrPvim(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
// CPP-CHECK-NEXT: ret ptr [[CALL]]
//
void *test_sc_memchr(void *s, int c, size_t n) __arm_streaming_compatible {
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 14e5637f62517e..883eb990f7ba49 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1636,12 +1636,8 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) {
OS << " return x0 & (1ULL << 63);\n";
OS << "}\n\n";
- OS << "__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible "
- "{\n";
- OS << " uint64_t x0, x1;\n";
- OS << " __builtin_arm_get_sme_state(&x0, &x1);\n";
- OS << " return x0 & 1;\n";
- OS << "}\n\n";
+ OS << "__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_in_streaming_mode)))";
+ OS << " bool __arm_in_streaming_mode(void) __arm_streaming_compatible;\n\n";
OS << "void *__arm_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n";
OS << "void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n";
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 53a66099a92bda..cc7a81e15f6609 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2974,6 +2974,7 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
+ def int_aarch64_sme_in_streaming_mode : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem]>, ClangBuiltin<"__builtin_arm_in_streaming_mode">;
class SME_OuterProduct_Intrinsic
: DefaultAttrsIntrinsic<[],
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 28f304100326c6..708753f5762b4c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1183,6 +1183,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setMaxDivRemBitWidthSupported(128);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ if (Subtarget->hasSME())
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
if (Subtarget->isNeonAvailable()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
@@ -27292,6 +27294,13 @@ void AArch64TargetLowering::ReplaceNodeResults(
N->getOperand(1), N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
+ }
+ case Intrinsic::aarch64_sme_in_streaming_mode: {
+ auto DL = SDLoc(N);
+ SDValue Chain = DAG.getEntryNode();
+ auto RuntimePStateSM = getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM));
+ return;
}
case Intrinsic::experimental_vector_match:
case Intrinsic::get_active_lane_mask: {
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll
new file mode 100644
index 00000000000000..1e534e746d7e38
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+
+
+define i1 @streaming_mode_st_compatible() #0 {
+; CHECK-LABEL: streaming_mode_st_compatible:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and w0, w0, #0x1
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
+ ret i1 %mode
+}
+
+define i1 @streaming_mode_st_enabled() #1 {
+; CHECK-LABEL: streaming_mode_st_enabled:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and w0, w0, #0x1
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
+ ret i1 %mode
+}
+
+define i1 @streaming_mode_st_disabled() #2 {
+; CHECK-LABEL: streaming_mode_st_disabled:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and w0, w0, #0x1
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
+ ret i1 %mode
+}
+
+
+attributes #0 = {nounwind memory(none) "aarch64_pstate_sm_compatible"}
+attributes #1 = {nounwind memory(none) "aarch64_pstate_sm_enabled"}
+attributes #2 = {nounwind memory(none)}
``````````
</details>
https://github.com/llvm/llvm-project/pull/120265
More information about the llvm-commits
mailing list