[llvm] [AArch64] Don't inline streaming Fn if caller has no SVE (PR #150595)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 25 02:18:28 PDT 2025


https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/150595

Without this change, the following test would fail to compile
with `-march=armv8-a+sme`:

```
  void func1(const svuint32_t *in, svuint32_t *out) {
    [&]() __arm_streaming { *out = *in; }();
  }
```

>From 482acab9cf61e5bc4346121324a76f324e9ebd06 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 24 Jul 2025 14:39:34 +0000
Subject: [PATCH 1/2] Precommit test

---
 .../Inline/AArch64/sme-pstatesm-attrs.ll      | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
index 6cb16928ae6ca..3305c76796a52 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
@@ -676,4 +676,26 @@ define void @streaming_caller_multiple_streaming_compatible_callees_inline() #0
   ret void
 }
 
+define void @nosve_streaming_function(ptr %ptr) "target-features"="+sme" "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @nosve_streaming_function
+; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    ret void
+;
+  store <vscale x 4 x i32> zeroinitializer, ptr %ptr
+  ret void
+}
+
+; Don't allow inlining a streaming function into a non-streaming function
+; if the non-streaming function has no SVE.
+define void @nosve_non_streaming_caller_streaming_callee_dont_inline(ptr %ptr) "target-features"="+sme"  {
+; CHECK-LABEL: define void @nosve_non_streaming_caller_streaming_callee_dont_inline
+; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    ret void
+;
+  call void @nosve_streaming_function(ptr %ptr)
+  ret void
+}
+
 attributes #0 = { "target-features"="+sve,+sme" }

>From 8e6e7eeedf32245fe27c995b5f7b88fb38a2bead Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 24 Jul 2025 14:16:49 +0000
Subject: [PATCH 2/2] [AArch64] Dont inline streaming Fn if caller has no SVE

Without this change, the following test would fail to compile
with `-march=armv8-a+sme`:

  void func1(const svuint32_t *in, svuint32_t *out) {
    [&]() __arm_streaming { *out = *in; }();
  }
---
 .../Target/AArch64/AArch64TargetTransformInfo.cpp    | 12 +++++++++---
 .../Transforms/Inline/AArch64/sme-pstatesm-attrs.ll  |  2 +-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 40f49dade6131..38711206a9ea1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -280,6 +280,15 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
   if (CallAttrs.callee().isNewZA() || CallAttrs.callee().isNewZT0())
     return false;
 
+  const TargetMachine &TM = getTLI()->getTargetMachine();
+  const FeatureBitset &CallerBits =
+      TM.getSubtargetImpl(*Caller)->getFeatureBits();
+
+  // Cannot inline a streaming function into a non-streaming function,
+  // if the caller has no SVE.
+  if (CallAttrs.requiresSMChange() && !CallerBits.test(AArch64::FeatureSVE))
+    return false;
+
   if (CallAttrs.requiresLazySave() || CallAttrs.requiresSMChange() ||
       CallAttrs.requiresPreservingZT0() ||
       CallAttrs.requiresPreservingAllZAState()) {
@@ -287,9 +296,6 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
       return false;
   }
 
-  const TargetMachine &TM = getTLI()->getTargetMachine();
-  const FeatureBitset &CallerBits =
-      TM.getSubtargetImpl(*Caller)->getFeatureBits();
   const FeatureBitset &CalleeBits =
       TM.getSubtargetImpl(*Callee)->getFeatureBits();
   // Adjust the feature bitsets by inverting some of the bits. This is needed
diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
index 3305c76796a52..ce323701cbe48 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
@@ -691,7 +691,7 @@ define void @nosve_streaming_function(ptr %ptr) "target-features"="+sme" "aarch6
 define void @nosve_non_streaming_caller_streaming_callee_dont_inline(ptr %ptr) "target-features"="+sme"  {
 ; CHECK-LABEL: define void @nosve_non_streaming_caller_streaming_callee_dont_inline
 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    call void @nosve_streaming_function(ptr [[PTR]])
 ; CHECK-NEXT:    ret void
 ;
   call void @nosve_streaming_function(ptr %ptr)



More information about the llvm-commits mailing list