[llvm] 7fad304 - [AArch64][SME] Make coalescer barrier available without +sme. (#85311)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 18 02:43:07 PDT 2024
Author: Sander de Smalen
Date: 2024-03-18T09:43:03Z
New Revision: 7fad304a0310836c88aefd2a01f825e70bb14aed
URL: https://github.com/llvm/llvm-project/commit/7fad304a0310836c88aefd2a01f825e70bb14aed
DIFF: https://github.com/llvm/llvm-project/commit/7fad304a0310836c88aefd2a01f825e70bb14aed.diff
LOG: [AArch64][SME] Make coalescer barrier available without +sme. (#85311)
For each call that changes the streaming-mode ISel inserts a
COALESCER_BARRIER node for the FP and (non-scalable) vector arguments to
the callee.
When calling a non-streaming function from a streaming-compatible
function, it's not required to have +sme (in case the SME code-path is
not actually executed at runtime). The patterns to match the
COALESCER_BARRIER however were still predicated with `HasSME`, which is
incorrect. This patch tries to fix that.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 1554f1c92b5bbb..b65c67e70a4e0f 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -164,6 +164,8 @@ def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
def : Pat<(i64 (int_aarch64_sme_get_tpidr2)),
(MRS 0xde85)>;
+} // End let Predicates = [HasSME]
+
multiclass CoalescerBarrierPseudo<RegisterClass rc, list<ValueType> vts> {
def NAME : Pseudo<(outs rc:$dst), (ins rc:$src), []>, Sched<[]> {
let Constraints = "$dst = $src";
@@ -183,8 +185,6 @@ multiclass CoalescerBarriers {
defm COALESCER_BARRIER : CoalescerBarriers;
-} // End let Predicates = [HasSME]
-
// Pseudo to match to smstart/smstop. This expands:
//
// pseudonode (pstate_za|pstate_sm), before_call, expected_value
diff --git a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
index 3fa1ee5b9b0114..dba3227459b906 100644
--- a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
+++ b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
@@ -38,4 +38,43 @@ define void @streaming_compatible() #0 {
declare void @non_streaming()
+
+; Verify that COALESCER_BARRIER is also supported without +sme.
+
+define void @streaming_compatible_arg(float %f) #0 {
+; CHECK-LABEL: streaming_compatible_arg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
+; CHECK-NEXT: and x19, x0, #0x1
+; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
+; CHECK-NEXT: tbz w19, #0, .LBB1_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
+; CHECK-NEXT: bl non_streaming
+; CHECK-NEXT: tbz w19, #0, .LBB1_4
+; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .LBB1_4:
+; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: ret
+ call void @non_streaming(float %f)
+ ret void
+}
+
+
attributes #0 = { nounwind "aarch64_pstate_sm_compatible" }
More information about the llvm-commits
mailing list