[llvm] [AArch64] Dont inline streaming fn into non-streaming caller (PR #150595)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 31 07:06:38 PDT 2025


https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/150595

>From 3251573e1223bc990b025c054ff6e8a425fc76b8 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 24 Jul 2025 14:39:34 +0000
Subject: [PATCH 1/3] Precommit test

---
 .../Inline/AArch64/sme-pstatesm-attrs.ll      | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
index 6cb16928ae6ca..3c0f517501353 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
@@ -676,4 +676,46 @@ define void @streaming_caller_multiple_streaming_compatible_callees_inline() #0
   ret void
 }
 
+define void @simple_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @simple_streaming_function
+; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    ret void
+;
+  store <vscale x 4 x i32> zeroinitializer, ptr %ptr
+  ret void
+}
+
+; Don't allow inlining a streaming function into a non-streaming function.
+define void @non_streaming_caller_streaming_callee_dont_inline(ptr %ptr) #0 {
+; CHECK-LABEL: define void @non_streaming_caller_streaming_callee_dont_inline
+; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    ret void
+;
+  call void @simple_streaming_function(ptr %ptr)
+  ret void
+}
+
+define void @simple_locally_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_body" {
+; CHECK-LABEL: define void @simple_locally_streaming_function
+; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    ret void
+;
+  store <vscale x 4 x i32> zeroinitializer, ptr %ptr
+  ret void
+}
+
+; Don't allow inlining a locally-streaming function into a non-streaming function.
+define void @non_streaming_caller_locally_streaming_callee_dont_inline(ptr %ptr) #0 {
+; CHECK-LABEL: define void @non_streaming_caller_locally_streaming_callee_dont_inline
+; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    ret void
+;
+  call void @simple_locally_streaming_function(ptr %ptr)
+  ret void
+}
+
 attributes #0 = { "target-features"="+sve,+sme" }

>From 4cfce256c0ed7902dc837fe71ff93df9bae2b238 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 24 Jul 2025 14:16:49 +0000
Subject: [PATCH 2/3] [AArch64] Dont inline streaming fn into non-streaming
 caller

Without this change, the following test would fail to compile
with `-march=armv8-a+sme`:

  void func1(const svuint32_t *in, svuint32_t *out) {
    [&]() __arm_streaming { *out = *in; }();
  }

But in general, it's probably better never to inline
streaming functions into non-streaming functions, because
they will have been marked as 'streaming' for a reason
by the user.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  7 ++
 .../sme-pstatesm-attrs-low-threshold.ll       |  3 +-
 .../Inline/AArch64/sme-pstatesm-attrs.ll      | 89 +++++++++----------
 3 files changed, 52 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 40f49dade6131..2bb61ddf6d818 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -270,6 +270,13 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
                                          const Function *Callee) const {
   SMECallAttrs CallAttrs(*Caller, *Callee);
 
+  // Never inline a function explicitly marked as being streaming,
+  // into a non-streaming function. Assume it was marked as streaming
+  // for a reason.
+  if (CallAttrs.caller().hasNonStreamingInterfaceAndBody() &&
+      CallAttrs.callee().hasStreamingInterfaceOrBody())
+    return false;
+
   // When inlining, we should consider the body of the function, not the
   // interface.
   if (CallAttrs.callee().hasStreamingBody()) {
diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
index b57a45fe41834..1878b62e88881 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
@@ -25,8 +25,7 @@ define void @streaming_callee() #0 "aarch64_pstate_sm_enabled" {
 define void @non_streaming_caller_inline() #0 {
 ; CHECK-LABEL: define void @non_streaming_caller_inline
 ; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT:    call void @streaming_compatible_f()
-; CHECK-NEXT:    call void @streaming_compatible_f()
+; CHECK-NEXT:    call void @streaming_callee()
 ; CHECK-NEXT:    ret void
 ;
   call void @streaming_callee()
diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
index 3c0f517501353..dc6255170b9c6 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
@@ -86,7 +86,7 @@ entry:
 ; [ ] N  -> SC + B
 define i32 @normal_caller_normal_callee_inline() #0 {
 ; CHECK-LABEL: define i32 @normal_caller_normal_callee_inline
-; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -103,7 +103,7 @@ entry:
 ; [ ] N  -> SC + B
 define i32 @normal_caller_streaming_callee_dont_inline() #0 {
 ; CHECK-LABEL: define i32 @normal_caller_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @streaming_callee()
 ; CHECK-NEXT:    ret i32 [[RES]]
@@ -120,7 +120,7 @@ entry:
 ; [ ] N  -> SC + B
 define i32 @normal_caller_streaming_compatible_callee_inline() #0  {
 ; CHECK-LABEL: define i32 @normal_caller_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -137,7 +137,7 @@ entry:
 ; [ ] N  -> SC + B
 define i32 @normal_caller_locally_streaming_callee_dont_inline() #0  {
 ; CHECK-LABEL: define i32 @normal_caller_locally_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @locally_streaming_callee()
 ; CHECK-NEXT:    ret i32 [[RES]]
@@ -154,7 +154,7 @@ entry:
 ; [x] N  -> SC + B
 define i32 @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline() #0  {
 ; CHECK-LABEL: define i32 @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @streaming_compatible_locally_streaming_callee()
 ; CHECK-NEXT:    ret i32 [[RES]]
@@ -171,7 +171,7 @@ entry:
 ; [ ] S  -> SC + B
 define i32 @streaming_caller_normal_callee_dont_inline() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define i32 @streaming_caller_normal_callee_dont_inline
-; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @normal_callee()
 ; CHECK-NEXT:    ret i32 [[RES]]
@@ -188,7 +188,7 @@ entry:
 ; [ ] S  -> SC + B
 define i32 @streaming_caller_streaming_callee_inline() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define i32 @streaming_caller_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -205,7 +205,7 @@ entry:
 ; [ ] S  -> SC + B
 define i32 @streaming_caller_streaming_compatible_callee_inline() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define i32 @streaming_caller_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -222,7 +222,7 @@ entry:
 ; [ ] S  -> SC + B
 define i32 @streaming_caller_locally_streaming_callee_inline() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define i32 @streaming_caller_locally_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -239,7 +239,7 @@ entry:
 ; [x] S  -> SC + B
 define i32 @streaming_caller_streaming_compatible_locally_streaming_callee_inline() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define i32 @streaming_caller_streaming_compatible_locally_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -256,7 +256,7 @@ entry:
 ; [ ] N + B -> SC + B
 define i32 @locally_streaming_caller_normal_callee_dont_inline() #0  "aarch64_pstate_sm_body" {
 ; CHECK-LABEL: define i32 @locally_streaming_caller_normal_callee_dont_inline
-; CHECK-SAME: () #[[ATTR8:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @normal_callee()
 ; CHECK-NEXT:    ret i32 [[RES]]
@@ -273,7 +273,7 @@ entry:
 ; [ ] N + B -> SC + B
 define i32 @locally_streaming_caller_streaming_callee_inline() #0  "aarch64_pstate_sm_body" {
 ; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR8]] {
+; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -290,7 +290,7 @@ entry:
 ; [ ] N + B -> SC + B
 define i32 @locally_streaming_caller_streaming_compatible_callee_inline() #0  "aarch64_pstate_sm_body" {
 ; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR8]] {
+; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -307,7 +307,7 @@ entry:
 ; [ ] N + B -> SC + B
 define i32 @locally_streaming_caller_locally_streaming_callee_inline() #0  "aarch64_pstate_sm_body" {
 ; CHECK-LABEL: define i32 @locally_streaming_caller_locally_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR8]] {
+; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -324,7 +324,7 @@ entry:
 ; [x] N + B -> SC + B
 define i32 @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline() #0  "aarch64_pstate_sm_body" {
 ; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR8]] {
+; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -341,7 +341,7 @@ entry:
 ; [ ] SC -> SC + B
 define i32 @streaming_compatible_caller_normal_callee_dont_inline() #0  "aarch64_pstate_sm_compatible" {
 ; CHECK-LABEL: define i32 @streaming_compatible_caller_normal_callee_dont_inline
-; CHECK-SAME: () #[[ATTR9:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @normal_callee()
 ; CHECK-NEXT:    ret i32 [[RES]]
@@ -358,7 +358,7 @@ entry:
 ; [ ] SC -> SC + B
 define i32 @streaming_compatible_caller_streaming_callee_dont_inline() #0  "aarch64_pstate_sm_compatible" {
 ; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR9]] {
+; CHECK-SAME: () #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @streaming_callee()
 ; CHECK-NEXT:    ret i32 [[RES]]
@@ -375,7 +375,7 @@ entry:
 ; [ ] SC -> SC + B
 define i32 @streaming_compatible_caller_streaming_compatible_callee_inline() #0  "aarch64_pstate_sm_compatible" {
 ; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR9]] {
+; CHECK-SAME: () #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -392,7 +392,7 @@ entry:
 ; [ ] SC -> SC + B
 define i32 @streaming_compatible_caller_locally_streaming_callee_dont_inline() #0  "aarch64_pstate_sm_compatible" {
 ; CHECK-LABEL: define i32 @streaming_compatible_caller_locally_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR9]] {
+; CHECK-SAME: () #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @locally_streaming_callee()
 ; CHECK-NEXT:    ret i32 [[RES]]
@@ -409,7 +409,7 @@ entry:
 ; [x] SC -> SC + B
 define i32 @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline() #0  "aarch64_pstate_sm_compatible" {
 ; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR9]] {
+; CHECK-SAME: () #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @streaming_compatible_locally_streaming_callee()
 ; CHECK-NEXT:    ret i32 [[RES]]
@@ -425,7 +425,7 @@ entry:
 ; [ ] SC + B -> SC + B
 define i32 @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline() #0  "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
 ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline
-; CHECK-SAME: () #[[ATTR10:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR5]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @normal_callee()
 ; CHECK-NEXT:    ret i32 [[RES]]
@@ -442,7 +442,7 @@ entry:
 ; [ ] SC + B -> SC + B
 define i32 @streaming_compatible_locally_streaming_caller_streaming_callee_inline() #0  "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
 ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR10]] {
+; CHECK-SAME: () #[[ATTR5]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -459,7 +459,7 @@ entry:
 ; [ ] SC + B -> SC + B
 define i32 @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline() #0  "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
 ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR10]] {
+; CHECK-SAME: () #[[ATTR5]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -476,7 +476,7 @@ entry:
 ; [ ] SC + B -> SC + B
 define i32 @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline() #0  "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
 ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR10]] {
+; CHECK-SAME: () #[[ATTR5]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -493,7 +493,7 @@ entry:
 ; [x] SC + B -> SC + B
 define i32 @streaming_compatible_locally_streaming_caller_and_callee_inline() #0  "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
 ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_and_callee_inline
-; CHECK-SAME: () #[[ATTR10]] {
+; CHECK-SAME: () #[[ATTR5]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    ret i32 [[RES_I]]
@@ -505,7 +505,7 @@ entry:
 
 define void @normal_callee_with_inlineasm() #0  {
 ; CHECK-LABEL: define void @normal_callee_with_inlineasm
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    ret void
@@ -517,7 +517,7 @@ entry:
 
 define void @streaming_caller_normal_callee_with_inlineasm_dont_inline() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define void @streaming_caller_normal_callee_with_inlineasm_dont_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @normal_callee_with_inlineasm()
 ; CHECK-NEXT:    ret void
@@ -529,7 +529,7 @@ entry:
 
 define i64 @normal_callee_with_intrinsic_call() #0  {
 ; CHECK-LABEL: define i64 @normal_callee_with_intrinsic_call
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i64 @llvm.aarch64.sve.cntb(i32 4)
 ; CHECK-NEXT:    ret i64 [[RES]]
@@ -541,7 +541,7 @@ entry:
 
 define i64 @streaming_caller_normal_callee_with_intrinsic_call_dont_inline() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define i64 @streaming_caller_normal_callee_with_intrinsic_call_dont_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i64 @normal_callee_with_intrinsic_call()
 ; CHECK-NEXT:    ret i64 [[RES]]
@@ -555,7 +555,7 @@ declare i64 @llvm.aarch64.sve.cntb(i32)
 
 define i64 @normal_callee_call_sme_state() #0  {
 ; CHECK-LABEL: define i64 @normal_callee_call_sme_state
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call { i64, i64 } @__arm_sme_state()
 ; CHECK-NEXT:    [[RES_0:%.*]] = extractvalue { i64, i64 } [[RES]], 0
@@ -571,7 +571,7 @@ declare {i64, i64} @__arm_sme_state()
 
 define i64 @streaming_caller_normal_callee_call_sme_state_dont_inline() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define i64 @streaming_caller_normal_callee_call_sme_state_dont_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RES:%.*]] = call i64 @normal_callee_call_sme_state()
 ; CHECK-NEXT:    ret i64 [[RES]]
@@ -587,7 +587,7 @@ declare void @streaming_body() "aarch64_pstate_sm_enabled"
 
 define void @streaming_caller_single_streaming_callee() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define void @streaming_caller_single_streaming_callee
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:    call void @streaming_body()
 ; CHECK-NEXT:    ret void
 ;
@@ -597,7 +597,7 @@ define void @streaming_caller_single_streaming_callee() #0  "aarch64_pstate_sm_e
 
 define void @streaming_caller_multiple_streaming_callees() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:    call void @streaming_body()
 ; CHECK-NEXT:    call void @streaming_body()
 ; CHECK-NEXT:    ret void
@@ -610,8 +610,8 @@ define void @streaming_caller_multiple_streaming_callees() #0  "aarch64_pstate_s
 ; Allow inlining, as inline it would not increase the number of streaming-mode changes.
 define void @streaming_caller_single_streaming_callee_inline() #0  {
 ; CHECK-LABEL: define void @streaming_caller_single_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR6]] {
-; CHECK-NEXT:    call void @streaming_body()
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    call void @streaming_caller_single_streaming_callee()
 ; CHECK-NEXT:    ret void
 ;
   call void @streaming_caller_single_streaming_callee()
@@ -621,7 +621,7 @@ define void @streaming_caller_single_streaming_callee_inline() #0  {
 ; Prevent inlining, as inline it would lead to multiple streaming-mode changes.
 define void @streaming_caller_multiple_streaming_callees_dont_inline() #0  {
 ; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees_dont_inline
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:    call void @streaming_caller_multiple_streaming_callees()
 ; CHECK-NEXT:    ret void
 ;
@@ -633,7 +633,7 @@ declare void @streaming_compatible_body() "aarch64_pstate_sm_compatible"
 
 define void @streaming_caller_single_streaming_compatible_callee() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:    call void @streaming_compatible_body()
 ; CHECK-NEXT:    ret void
 ;
@@ -643,7 +643,7 @@ define void @streaming_caller_single_streaming_compatible_callee() #0  "aarch64_
 
 define void @streaming_caller_multiple_streaming_compatible_callees() #0  "aarch64_pstate_sm_enabled" {
 ; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:    call void @streaming_compatible_body()
 ; CHECK-NEXT:    call void @streaming_compatible_body()
 ; CHECK-NEXT:    ret void
@@ -656,8 +656,8 @@ define void @streaming_caller_multiple_streaming_compatible_callees() #0  "aarch
 ; Allow inlining, as inline would remove a streaming-mode change.
 define void @streaming_caller_single_streaming_compatible_callee_inline() #0  {
 ; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR6]] {
-; CHECK-NEXT:    call void @streaming_compatible_body()
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    call void @streaming_caller_single_streaming_compatible_callee()
 ; CHECK-NEXT:    ret void
 ;
   call void @streaming_caller_single_streaming_compatible_callee()
@@ -667,9 +667,8 @@ define void @streaming_caller_single_streaming_compatible_callee_inline() #0  {
 ; Allow inlining, as inline would remove several stremaing-mode changes.
 define void @streaming_caller_multiple_streaming_compatible_callees_inline() #0  {
 ; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees_inline
-; CHECK-SAME: () #[[ATTR6]] {
-; CHECK-NEXT:    call void @streaming_compatible_body()
-; CHECK-NEXT:    call void @streaming_compatible_body()
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    call void @streaming_caller_multiple_streaming_compatible_callees()
 ; CHECK-NEXT:    ret void
 ;
   call void @streaming_caller_multiple_streaming_compatible_callees()
@@ -690,7 +689,7 @@ define void @simple_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_enabled"
 define void @non_streaming_caller_streaming_callee_dont_inline(ptr %ptr) #0 {
 ; CHECK-LABEL: define void @non_streaming_caller_streaming_callee_dont_inline
 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    call void @simple_streaming_function(ptr [[PTR]])
 ; CHECK-NEXT:    ret void
 ;
   call void @simple_streaming_function(ptr %ptr)
@@ -711,7 +710,7 @@ define void @simple_locally_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_b
 define void @non_streaming_caller_locally_streaming_callee_dont_inline(ptr %ptr) #0 {
 ; CHECK-LABEL: define void @non_streaming_caller_locally_streaming_callee_dont_inline
 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    call void @simple_locally_streaming_function(ptr [[PTR]])
 ; CHECK-NEXT:    ret void
 ;
   call void @simple_locally_streaming_function(ptr %ptr)

>From 443e282057dfa7db877b93186cb2bd5cf680373a Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 31 Jul 2025 12:49:52 +0000
Subject: [PATCH 3/3] Fix tests

Because the behaviour now changed, I've had to modify some of the
other tests to test inlining with streaming mode changes the
other way around (streaming <- non-streaming instead of
non-streaming <- streaming)
---
 .../sme-pstatesm-attrs-low-threshold.ll       | 21 ++---
 .../Inline/AArch64/sme-pstatesm-attrs.ll      | 83 ++++++++++---------
 2 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
index 1878b62e88881..597f0cf479d16 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
@@ -9,8 +9,8 @@ declare void @streaming_compatible_f() #0 "aarch64_pstate_sm_compatible"
 
 ; Function @streaming_callee doesn't contain any operations that may use ZA
 ; state and therefore can be legally inlined into a normal function.
-define void @streaming_callee() #0 "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_callee
+define void @non_streaming_callee() #0 {
+; CHECK-LABEL: define void @non_streaming_callee
 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    call void @streaming_compatible_f()
 ; CHECK-NEXT:    call void @streaming_compatible_f()
@@ -22,24 +22,25 @@ define void @streaming_callee() #0 "aarch64_pstate_sm_enabled" {
 }
 
 ; Inline call to @streaming_callee to remove a streaming mode change.
-define void @non_streaming_caller_inline() #0 {
-; CHECK-LABEL: define void @non_streaming_caller_inline
+define void @streaming_caller_inline() #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_inline
 ; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT:    call void @streaming_callee()
+; CHECK-NEXT:    call void @streaming_compatible_f()
+; CHECK-NEXT:    call void @streaming_compatible_f()
 ; CHECK-NEXT:    ret void
 ;
-  call void @streaming_callee()
+  call void @non_streaming_callee()
   ret void
 }
 
 ; Don't inline call to @streaming_callee when the inline-threshold is set to 1, because it does not eliminate a streaming-mode change.
-define void @streaming_caller_dont_inline() #0 "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_caller_dont_inline
+define void @non_streaming_caller_dont_inline() #0 {
+; CHECK-LABEL: define void @non_streaming_caller_dont_inline
 ; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:    call void @streaming_callee()
+; CHECK-NEXT:    call void @non_streaming_callee()
 ; CHECK-NEXT:    ret void
 ;
-  call void @streaming_callee()
+  call void @non_streaming_callee()
   ret void
 }
 
diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
index dc6255170b9c6..077a3aa49fb41 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
@@ -583,57 +583,57 @@ entry:
 
 
 
-declare void @streaming_body() "aarch64_pstate_sm_enabled"
+declare void @nonstreaming_body()
 
-define void @streaming_caller_single_streaming_callee() #0  "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_caller_single_streaming_callee
-; CHECK-SAME: () #[[ATTR2]] {
-; CHECK-NEXT:    call void @streaming_body()
+define void @nonstreaming_caller_single_nonstreaming_callee() #0 {
+; CHECK-LABEL: define void @nonstreaming_caller_single_nonstreaming_callee
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    call void @nonstreaming_body()
 ; CHECK-NEXT:    ret void
 ;
-  call void @streaming_body()
+  call void @nonstreaming_body()
   ret void
 }
 
-define void @streaming_caller_multiple_streaming_callees() #0  "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees
-; CHECK-SAME: () #[[ATTR2]] {
-; CHECK-NEXT:    call void @streaming_body()
-; CHECK-NEXT:    call void @streaming_body()
+define void @nonstreaming_caller_multiple_nonstreaming_callees() #0 {
+; CHECK-LABEL: define void @nonstreaming_caller_multiple_nonstreaming_callees
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    call void @nonstreaming_body()
+; CHECK-NEXT:    call void @nonstreaming_body()
 ; CHECK-NEXT:    ret void
 ;
-  call void @streaming_body()
-  call void @streaming_body()
+  call void @nonstreaming_body()
+  call void @nonstreaming_body()
   ret void
 }
 
 ; Allow inlining, as inline it would not increase the number of streaming-mode changes.
-define void @streaming_caller_single_streaming_callee_inline() #0  {
-; CHECK-LABEL: define void @streaming_caller_single_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:    call void @streaming_caller_single_streaming_callee()
+define void @streaming_caller_to_nonstreaming_callee_with_single_nonstreaming_callee_inline() #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_single_nonstreaming_callee_inline
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    call void @nonstreaming_body()
 ; CHECK-NEXT:    ret void
 ;
-  call void @streaming_caller_single_streaming_callee()
+  call void @nonstreaming_caller_single_nonstreaming_callee()
   ret void
 }
 
-; Prevent inlining, as inline it would lead to multiple streaming-mode changes.
-define void @streaming_caller_multiple_streaming_callees_dont_inline() #0  {
-; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees_dont_inline
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:    call void @streaming_caller_multiple_streaming_callees()
+; Prevent inlining, as inlining it would lead to multiple streaming-mode changes.
+define void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline() #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    call void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline()
 ; CHECK-NEXT:    ret void
 ;
-  call void @streaming_caller_multiple_streaming_callees()
+  call void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline()
   ret void
 }
 
 declare void @streaming_compatible_body() "aarch64_pstate_sm_compatible"
 
-define void @streaming_caller_single_streaming_compatible_callee() #0  "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee
-; CHECK-SAME: () #[[ATTR2]] {
+define void @nonstreaming_caller_single_streaming_compatible_callee() #0 {
+; CHECK-LABEL: define void @nonstreaming_caller_single_streaming_compatible_callee
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:    call void @streaming_compatible_body()
 ; CHECK-NEXT:    ret void
 ;
@@ -641,9 +641,9 @@ define void @streaming_caller_single_streaming_compatible_callee() #0  "aarch64_
   ret void
 }
 
-define void @streaming_caller_multiple_streaming_compatible_callees() #0  "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees
-; CHECK-SAME: () #[[ATTR2]] {
+define void @nonstreaming_caller_multiple_streaming_compatible_callees() #0 {
+; CHECK-LABEL: define void @nonstreaming_caller_multiple_streaming_compatible_callees
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:    call void @streaming_compatible_body()
 ; CHECK-NEXT:    call void @streaming_compatible_body()
 ; CHECK-NEXT:    ret void
@@ -654,24 +654,25 @@ define void @streaming_caller_multiple_streaming_compatible_callees() #0  "aarch
 }
 
 ; Allow inlining, as inline would remove a streaming-mode change.
-define void @streaming_caller_single_streaming_compatible_callee_inline() #0  {
-; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:    call void @streaming_caller_single_streaming_compatible_callee()
+define void @streaming_caller_to_nonstreaming_callee_with_single_streamingcompatible_callee_inline() #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_single_streamingcompatible_callee_inline
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    call void @streaming_compatible_body()
 ; CHECK-NEXT:    ret void
 ;
-  call void @streaming_caller_single_streaming_compatible_callee()
+  call void @nonstreaming_caller_single_streaming_compatible_callee()
   ret void
 }
 
-; Allow inlining, as inline would remove several stremaing-mode changes.
-define void @streaming_caller_multiple_streaming_compatible_callees_inline() #0  {
-; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees_inline
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:    call void @streaming_caller_multiple_streaming_compatible_callees()
+; Allow inlining, as inline would remove several streaming-mode changes.
+define void @streaming_caller_to_nonstreaming_callee_with_multiple_streamingcompatible_callees_inline() #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_multiple_streamingcompatible_callees_inline
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    call void @streaming_compatible_body()
+; CHECK-NEXT:    call void @streaming_compatible_body()
 ; CHECK-NEXT:    ret void
 ;
-  call void @streaming_caller_multiple_streaming_compatible_callees()
+  call void @nonstreaming_caller_multiple_streaming_compatible_callees()
   ret void
 }
 



More information about the llvm-commits mailing list