[clang] [llvm] [clang][llvm][aarch64] Add aarch64_sme_in_streaming_mode intrinsic (PR #120265)

Nicholas Guy via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 6 02:09:40 PST 2025


https://github.com/NickGuy-Arm updated https://github.com/llvm/llvm-project/pull/120265

>From 898c30b5b97e80b8bdeb024aec30d0e530d39d42 Mon Sep 17 00:00:00 2001
From: Nick Guy <nicholas.guy at arm.com>
Date: Fri, 13 Dec 2024 13:39:24 +0000
Subject: [PATCH 1/5] [clang][llvm][aarch64] Add aarch64_sme_in_streaming_mode
 intrinsic

---
 clang/include/clang/Basic/arm_sme.td          |  2 +
 .../sme-intrinsics/acle_sme_state_funs.c      | 38 +++++++---------
 clang/utils/TableGen/SveEmitter.cpp           |  8 +---
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  1 +
 .../Target/AArch64/AArch64ISelLowering.cpp    |  9 ++++
 .../CodeGen/AArch64/sme-intrinsics-state.ll   | 44 +++++++++++++++++++
 6 files changed, 74 insertions(+), 28 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 6b31dec004a1e2..e66a023f998ed4 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -716,6 +716,8 @@ let SMETargetGuard = "sme2" in {
   def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
 }
 
+def IN_STREAMING_MODE :  Inst<"in_streaming_mode", "d", "", MergeNone, "aarch64_sme_in_streaming_mode", [IsOverloadNone, IsStreamingCompatible], []>;
+
 //
 // lookup table expand four contiguous registers
 //
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c
index 9ba1527f269663..e880f7d7dbacd8 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c
@@ -8,19 +8,13 @@
 
 // CHECK-LABEL: @test_in_streaming_mode(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]]
-// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
-// CHECK-NEXT:    [[AND_I:%.*]] = and i64 [[TMP1]], 1
-// CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0
-// CHECK-NEXT:    ret i1 [[TOBOOL_I]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
+// CHECK-NEXT:    ret i1 [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z22test_in_streaming_modev(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]]
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
-// CPP-CHECK-NEXT:    [[AND_I:%.*]] = and i64 [[TMP1]], 1
-// CPP-CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0
-// CPP-CHECK-NEXT:    ret i1 [[TOBOOL_I]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
+// CPP-CHECK-NEXT:    ret i1 [[TMP0]]
 //
 bool test_in_streaming_mode(void) __arm_streaming_compatible {
   return __arm_in_streaming_mode();
@@ -28,12 +22,12 @@ bool test_in_streaming_mode(void) __arm_streaming_compatible {
 
 // CHECK-LABEL: @test_za_disable(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @__arm_za_disable() #[[ATTR3]]
+// CHECK-NEXT:    tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z15test_za_disablev(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @__arm_za_disable() #[[ATTR3]]
+// CPP-CHECK-NEXT:    tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]]
 // CPP-CHECK-NEXT:    ret void
 //
 void test_za_disable(void) __arm_streaming_compatible {
@@ -42,14 +36,14 @@ void test_za_disable(void) __arm_streaming_compatible {
 
 // CHECK-LABEL: @test_has_sme(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]]
 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
 //
 // CPP-CHECK-LABEL: @_Z12test_has_smev(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]]
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
 // CPP-CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
 // CPP-CHECK-NEXT:    ret i1 [[TOBOOL_I]]
@@ -72,12 +66,12 @@ void test_svundef_za(void) __arm_streaming_compatible __arm_out("za") {
 
 // CHECK-LABEL: @test_sc_memcpy(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
 // CHECK-NEXT:    ret ptr [[CALL]]
 //
 // CPP-CHECK-LABEL: @_Z14test_sc_memcpyPvPKvm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
 // CPP-CHECK-NEXT:    ret ptr [[CALL]]
 //
 void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible {
@@ -86,12 +80,12 @@ void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_comp
 
 // CHECK-LABEL: @test_sc_memmove(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
 // CHECK-NEXT:    ret ptr [[CALL]]
 //
 // CPP-CHECK-LABEL: @_Z15test_sc_memmovePvPKvm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
 // CPP-CHECK-NEXT:    ret ptr [[CALL]]
 //
 void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible {
@@ -100,12 +94,12 @@ void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_com
 
 // CHECK-LABEL: @test_sc_memset(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
 // CHECK-NEXT:    ret ptr [[CALL]]
 //
 // CPP-CHECK-LABEL: @_Z14test_sc_memsetPvim(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
 // CPP-CHECK-NEXT:    ret ptr [[CALL]]
 //
 void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible {
@@ -114,12 +108,12 @@ void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible {
 
 // CHECK-LABEL: @test_sc_memchr(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
 // CHECK-NEXT:    ret ptr [[CALL]]
 //
 // CPP-CHECK-LABEL: @_Z14test_sc_memchrPvim(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]]
+// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
 // CPP-CHECK-NEXT:    ret ptr [[CALL]]
 //
 void *test_sc_memchr(void *s, int c, size_t n) __arm_streaming_compatible {
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 14e5637f62517e..883eb990f7ba49 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1636,12 +1636,8 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) {
   OS << "  return x0 & (1ULL << 63);\n";
   OS << "}\n\n";
 
-  OS << "__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible "
-        "{\n";
-  OS << "  uint64_t x0, x1;\n";
-  OS << "  __builtin_arm_get_sme_state(&x0, &x1);\n";
-  OS << "  return x0 & 1;\n";
-  OS << "}\n\n";
+  OS << "__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_in_streaming_mode)))";
+  OS << " bool __arm_in_streaming_mode(void) __arm_streaming_compatible;\n\n";
 
   OS << "void *__arm_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n";
   OS << "void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n";
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 53a66099a92bda..cc7a81e15f6609 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2974,6 +2974,7 @@ let TargetPrefix = "aarch64" in {
 
 
   def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
+  def int_aarch64_sme_in_streaming_mode : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem]>, ClangBuiltin<"__builtin_arm_in_streaming_mode">;
 
   class SME_OuterProduct_Intrinsic
       : DefaultAttrsIntrinsic<[],
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 28f304100326c6..708753f5762b4c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1183,6 +1183,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setMaxDivRemBitWidthSupported(128);
 
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  if (Subtarget->hasSME())
+    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
 
   if (Subtarget->isNeonAvailable()) {
     // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
@@ -27292,6 +27294,13 @@ void AArch64TargetLowering::ReplaceNodeResults(
                            N->getOperand(1), N->getOperand(2));
       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
       return;
+    }
+      case Intrinsic::aarch64_sme_in_streaming_mode: {
+      auto DL = SDLoc(N);
+      SDValue Chain = DAG.getEntryNode();
+      auto RuntimePStateSM = getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0));
+      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM));
+      return;
     }
     case Intrinsic::experimental_vector_match:
     case Intrinsic::get_active_lane_mask: {
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll
new file mode 100644
index 00000000000000..1e534e746d7e38
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+
+
+define i1 @streaming_mode_st_compatible() #0 {
+; CHECK-LABEL: streaming_mode_st_compatible:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    and w0, w0, #0x1
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
+  ret i1 %mode
+}
+
+define i1 @streaming_mode_st_enabled() #1 {
+; CHECK-LABEL: streaming_mode_st_enabled:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    and w0, w0, #0x1
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
+  ret i1 %mode
+}
+
+define i1 @streaming_mode_st_disabled() #2 {
+; CHECK-LABEL: streaming_mode_st_disabled:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    and w0, w0, #0x1
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
+  ret i1 %mode
+}
+
+
+attributes #0 = {nounwind memory(none) "aarch64_pstate_sm_compatible"}
+attributes #1 = {nounwind memory(none) "aarch64_pstate_sm_enabled"}
+attributes #2 = {nounwind memory(none)}

>From 20701ac53cfb49a27df947c0eecb59acbb25a1dc Mon Sep 17 00:00:00 2001
From: Nick Guy <nicholas.guy at arm.com>
Date: Wed, 18 Dec 2024 11:39:07 +0000
Subject: [PATCH 2/5] Remove redundant __arm_in_streaming_mode declaration

---
 clang/include/clang/Basic/arm_sme.td            | 2 +-
 clang/utils/TableGen/SveEmitter.cpp             | 3 ---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 +++++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index e66a023f998ed4..891ed9874bb3d0 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -716,7 +716,7 @@ let SMETargetGuard = "sme2" in {
   def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
 }
 
-def IN_STREAMING_MODE :  Inst<"in_streaming_mode", "d", "", MergeNone, "aarch64_sme_in_streaming_mode", [IsOverloadNone, IsStreamingCompatible], []>;
+def IN_STREAMING_MODE :  Inst<"__arm_in_streaming_mode", "sv", "Pc", MergeNone, "aarch64_sme_in_streaming_mode", [IsOverloadNone, IsStreamingCompatible], []>;
 
 //
 // lookup table expand four contiguous registers
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 883eb990f7ba49..e0616d679e45b9 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1636,9 +1636,6 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) {
   OS << "  return x0 & (1ULL << 63);\n";
   OS << "}\n\n";
 
-  OS << "__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_in_streaming_mode)))";
-  OS << " bool __arm_in_streaming_mode(void) __arm_streaming_compatible;\n\n";
-
   OS << "void *__arm_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n";
   OS << "void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n";
   OS << "void *__arm_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible;\n";
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 708753f5762b4c..7af38bc82aadf1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27295,11 +27295,13 @@ void AArch64TargetLowering::ReplaceNodeResults(
       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
       return;
     }
-      case Intrinsic::aarch64_sme_in_streaming_mode: {
+    case Intrinsic::aarch64_sme_in_streaming_mode: {
       auto DL = SDLoc(N);
       SDValue Chain = DAG.getEntryNode();
-      auto RuntimePStateSM = getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0));
-      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM));
+      auto RuntimePStateSM =
+          getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0));
+      Results.push_back(
+          DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM));
       return;
     }
     case Intrinsic::experimental_vector_match:

>From 453324a87f9ba669b3c997f47c5d3ce76992a6ea Mon Sep 17 00:00:00 2001
From: Nick Guy <nicholas.guy at arm.com>
Date: Wed, 18 Dec 2024 13:34:44 +0000
Subject: [PATCH 3/5] Replaced auto with actual types

---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7af38bc82aadf1..78d6d71a7a98bb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27296,9 +27296,9 @@ void AArch64TargetLowering::ReplaceNodeResults(
       return;
     }
     case Intrinsic::aarch64_sme_in_streaming_mode: {
-      auto DL = SDLoc(N);
+      SDLoc DL(N);
       SDValue Chain = DAG.getEntryNode();
-      auto RuntimePStateSM =
+      SDValue RuntimePStateSM =
           getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0));
       Results.push_back(
           DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM));

>From 0eca79f711d4a5535ef04563ab4a79cd7ebbe8df Mon Sep 17 00:00:00 2001
From: Nick Guy <nicholas.guy at arm.com>
Date: Wed, 18 Dec 2024 15:57:09 +0000
Subject: [PATCH 4/5] Address comments

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 15 +++++
 .../sme-intrinsics/acle_sme_state_funs.c      | 55 ++++++++++++++-----
 .../CodeGen/AArch64/sme-intrinsics-state.ll   | 28 +---------
 3 files changed, 56 insertions(+), 42 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4d4b7428abd505..fe9f0ade22f57d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -11285,6 +11285,21 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
   if (Builtin->LLVMIntrinsic == 0)
     return nullptr;
 
+  if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
+    // If we already know the streaming mode, don't bother with the intrinsic
+    // and emit a constant instead
+    auto FD = cast<FunctionDecl>(CurFuncDecl);
+    if (const Type *Ty = FD->getType().getTypePtrOrNull())
+      if (const auto *FPT = Ty->getAs<FunctionProtoType>()) {
+        unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
+        if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
+          bool IsStreamingMode =
+              SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
+          return ConstantInt::getBool(Builder.getContext(), IsStreamingMode);
+        }
+      }
+  }
+
   // Predicates must match the main datatype.
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c
index e880f7d7dbacd8..72f2d17fc6dc11 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c
@@ -6,28 +6,53 @@
 
 #include <arm_sme.h>
 
-// CHECK-LABEL: @test_in_streaming_mode(
+// CHECK-LABEL: @test_in_streaming_mode_streaming_compatible(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
 // CHECK-NEXT:    ret i1 [[TMP0]]
 //
-// CPP-CHECK-LABEL: @_Z22test_in_streaming_modev(
+// CPP-CHECK-LABEL: @_Z43test_in_streaming_mode_streaming_compatiblev(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode()
 // CPP-CHECK-NEXT:    ret i1 [[TMP0]]
 //
-bool test_in_streaming_mode(void) __arm_streaming_compatible {
+bool test_in_streaming_mode_streaming_compatible(void) __arm_streaming_compatible {
+  return __arm_in_streaming_mode();
+}
+
+// CHECK-LABEL: @test_in_streaming_mode_streaming(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i1 true
+//
+// CPP-CHECK-LABEL: @_Z32test_in_streaming_mode_streamingv(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    ret i1 true
+//
+bool test_in_streaming_mode_streaming(void) __arm_streaming {
+//
+  return __arm_in_streaming_mode();
+}
+
+// CHECK-LABEL: @test_in_streaming_mode_non_streaming(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i1 false
+//
+// CPP-CHECK-LABEL: @_Z36test_in_streaming_mode_non_streamingv(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    ret i1 false
+//
+bool test_in_streaming_mode_non_streaming(void) {
   return __arm_in_streaming_mode();
 }
 
 // CHECK-LABEL: @test_za_disable(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]]
+// CHECK-NEXT:    tail call void @__arm_za_disable() #[[ATTR7:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z15test_za_disablev(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]]
+// CPP-CHECK-NEXT:    tail call void @__arm_za_disable() #[[ATTR7:[0-9]+]]
 // CPP-CHECK-NEXT:    ret void
 //
 void test_za_disable(void) __arm_streaming_compatible {
@@ -36,14 +61,14 @@ void test_za_disable(void) __arm_streaming_compatible {
 
 // CHECK-LABEL: @test_has_sme(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR7]]
 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
 //
 // CPP-CHECK-LABEL: @_Z12test_has_smev(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR7]]
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0
 // CPP-CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0
 // CPP-CHECK-NEXT:    ret i1 [[TOBOOL_I]]
@@ -66,12 +91,12 @@ void test_svundef_za(void) __arm_streaming_compatible __arm_out("za") {
 
 // CHECK-LABEL: @test_sc_memcpy(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
+// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]]
 // CHECK-NEXT:    ret ptr [[CALL]]
 //
 // CPP-CHECK-LABEL: @_Z14test_sc_memcpyPvPKvm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
+// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]]
 // CPP-CHECK-NEXT:    ret ptr [[CALL]]
 //
 void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible {
@@ -80,12 +105,12 @@ void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_comp
 
 // CHECK-LABEL: @test_sc_memmove(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
+// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]]
 // CHECK-NEXT:    ret ptr [[CALL]]
 //
 // CPP-CHECK-LABEL: @_Z15test_sc_memmovePvPKvm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
+// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]]
 // CPP-CHECK-NEXT:    ret ptr [[CALL]]
 //
 void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible {
@@ -94,12 +119,12 @@ void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_com
 
 // CHECK-LABEL: @test_sc_memset(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
+// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]]
 // CHECK-NEXT:    ret ptr [[CALL]]
 //
 // CPP-CHECK-LABEL: @_Z14test_sc_memsetPvim(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
+// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]]
 // CPP-CHECK-NEXT:    ret ptr [[CALL]]
 //
 void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible {
@@ -108,12 +133,12 @@ void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible {
 
 // CHECK-LABEL: @test_sc_memchr(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
+// CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]]
 // CHECK-NEXT:    ret ptr [[CALL]]
 //
 // CPP-CHECK-LABEL: @_Z14test_sc_memchrPvim(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]]
+// CPP-CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]]
 // CPP-CHECK-NEXT:    ret ptr [[CALL]]
 //
 void *test_sc_memchr(void *s, int c, size_t n) __arm_streaming_compatible {
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll
index 1e534e746d7e38..4d78ae6c564839 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
 
 
-define i1 @streaming_mode_st_compatible() #0 {
+define i1 @streaming_mode_streaming_compatible() #0 {
 ; CHECK-LABEL: streaming_mode_st_compatible:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -14,31 +14,5 @@ define i1 @streaming_mode_st_compatible() #0 {
   ret i1 %mode
 }
 
-define i1 @streaming_mode_st_enabled() #1 {
-; CHECK-LABEL: streaming_mode_st_enabled:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    bl __arm_sme_state
-; CHECK-NEXT:    and w0, w0, #0x1
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
-  ret i1 %mode
-}
-
-define i1 @streaming_mode_st_disabled() #2 {
-; CHECK-LABEL: streaming_mode_st_disabled:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    bl __arm_sme_state
-; CHECK-NEXT:    and w0, w0, #0x1
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode()
-  ret i1 %mode
-}
-
 
 attributes #0 = {nounwind memory(none) "aarch64_pstate_sm_compatible"}
-attributes #1 = {nounwind memory(none) "aarch64_pstate_sm_enabled"}
-attributes #2 = {nounwind memory(none)}

>From afaf44786fe59e2ec1494fb13569e55bee4d44c3 Mon Sep 17 00:00:00 2001
From: Nick Guy <nicholas.guy at arm.com>
Date: Mon, 6 Jan 2025 10:08:16 +0000
Subject: [PATCH 5/5] Address nits and update test

---
 clang/lib/CodeGen/CGBuiltin.cpp                 | 17 ++++++++---------
 .../CodeGen/AArch64/sme-intrinsics-state.ll     |  2 +-
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index fe9f0ade22f57d..fdbd5c83c6b710 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -11288,16 +11288,15 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
   if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
     // If we already know the streaming mode, don't bother with the intrinsic
     // and emit a constant instead
-    auto FD = cast<FunctionDecl>(CurFuncDecl);
-    if (const Type *Ty = FD->getType().getTypePtrOrNull())
-      if (const auto *FPT = Ty->getAs<FunctionProtoType>()) {
-        unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
-        if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
-          bool IsStreamingMode =
-              SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
-          return ConstantInt::getBool(Builder.getContext(), IsStreamingMode);
-        }
+    const auto *FD = cast<FunctionDecl>(CurFuncDecl);
+    if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
+      unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
+      if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
+        bool IsStreaming =
+            SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
+        return ConstantInt::getBool(Builder.getContext(), IsStreaming);
       }
+    }
   }
 
   // Predicates must match the main datatype.
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll
index 4d78ae6c564839..5037772a51cee3 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll
@@ -3,7 +3,7 @@
 
 
 define i1 @streaming_mode_streaming_compatible() #0 {
-; CHECK-LABEL: streaming_mode_st_compatible:
+; CHECK-LABEL: streaming_mode_streaming_compatible:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    bl __arm_sme_state



More information about the llvm-commits mailing list