[llvm] [AArch64][SME] Make coalescer barrier available without +sme. (PR #85311)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 14 14:17:04 PDT 2024


https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/85311

For each call that changes the streaming-mode ISel inserts a COALESCER_BARRIER node for the FP and (non-scalable) vector arguments to the callee.

When calling a non-streaming function from a streaming-compatible function, it's not required to have +sme (in case the SME code-path is not actually executed at runtime). The patterns to match the COALESCER_BARRIER however were still predicated with `HasSME`, which is incorrect. This patch tries to fix that.

>From 51648d1e59b43b4cd207dc8dcaa4b1299f938e58 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 14 Mar 2024 20:49:08 +0000
Subject: [PATCH] [AArch64][SME] Make coalescer barrier available without +sme.

For each call that changes the streaming-mode ISel inserts a
COALESCER_BARRIER node for the FP and (non-scalable) vector arguments to
the callee.

When calling a non-streaming function from a streaming-compatible function,
it's not required to have +sme (in case the SME code-path is not actually
executed at runtime). The patterns to match the COALESCER_BARRIER however
were still predicated with `HasSME`, which is incorrect. This patch tries to
fix that.
---
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |  4 +-
 ...compatible-to-normal-fn-wihout-sme-attr.ll | 39 +++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 2907ba74ff8108..298d93669cfd31 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -192,6 +192,8 @@ def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
 def : Pat<(i64 (int_aarch64_sme_get_tpidr2)),
           (MRS 0xde85)>;
 
+} // End let Predicates = [HasSME]
+
 multiclass CoalescerBarrierPseudo<RegisterClass rc, list<ValueType> vts> {
   def NAME : Pseudo<(outs rc:$dst), (ins rc:$src), []>, Sched<[]> {
     let Constraints = "$dst = $src";
@@ -211,8 +213,6 @@ multiclass CoalescerBarriers {
 
 defm COALESCER_BARRIER : CoalescerBarriers;
 
-} // End let Predicates = [HasSME]
-
 // Pseudo to match to smstart/smstop. This expands:
 //
 //  pseudonode (pstate_za|pstate_sm), before_call, expected_value
diff --git a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
index 3fa1ee5b9b0114..dba3227459b906 100644
--- a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
+++ b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
@@ -38,4 +38,43 @@ define void @streaming_compatible() #0 {
 
 declare void @non_streaming()
 
+
+; Verify that COALESCER_BARRIER is also supported without +sme.
+
+define void @streaming_compatible_arg(float %f) #0 {
+; CHECK-LABEL: streaming_compatible_arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload
+; CHECK-NEXT:    and x19, x0, #0x1
+; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill
+; CHECK-NEXT:    tbz w19, #0, .LBB1_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload
+; CHECK-NEXT:    bl non_streaming
+; CHECK-NEXT:    tbz w19, #0, .LBB1_4
+; CHECK-NEXT:  // %bb.3:
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:  .LBB1_4:
+; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #96
+; CHECK-NEXT:    ret
+  call void @non_streaming(float %f)
+  ret void
+}
+
+
 attributes #0 = { nounwind "aarch64_pstate_sm_compatible" }



More information about the llvm-commits mailing list