[llvm] 7fad304 - [AArch64][SME] Make coalescer barrier available without +sme. (#85311)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 18 02:43:07 PDT 2024


Author: Sander de Smalen
Date: 2024-03-18T09:43:03Z
New Revision: 7fad304a0310836c88aefd2a01f825e70bb14aed

URL: https://github.com/llvm/llvm-project/commit/7fad304a0310836c88aefd2a01f825e70bb14aed
DIFF: https://github.com/llvm/llvm-project/commit/7fad304a0310836c88aefd2a01f825e70bb14aed.diff

LOG: [AArch64][SME] Make coalescer barrier available without +sme. (#85311)

For each call that changes the streaming-mode ISel inserts a
COALESCER_BARRIER node for the FP and (non-scalable) vector arguments to
the callee.

When calling a non-streaming function from a streaming-compatible
function, it's not required to have +sme (in case the SME code-path is
not actually executed at runtime). The patterns to match the
COALESCER_BARRIER however were still predicated with `HasSME`, which is
incorrect. This patch tries to fix that.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
    llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 1554f1c92b5bbb..b65c67e70a4e0f 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -164,6 +164,8 @@ def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
 def : Pat<(i64 (int_aarch64_sme_get_tpidr2)),
           (MRS 0xde85)>;
 
+} // End let Predicates = [HasSME]
+
 multiclass CoalescerBarrierPseudo<RegisterClass rc, list<ValueType> vts> {
   def NAME : Pseudo<(outs rc:$dst), (ins rc:$src), []>, Sched<[]> {
     let Constraints = "$dst = $src";
@@ -183,8 +185,6 @@ multiclass CoalescerBarriers {
 
 defm COALESCER_BARRIER : CoalescerBarriers;
 
-} // End let Predicates = [HasSME]
-
 // Pseudo to match to smstart/smstop. This expands:
 //
 //  pseudonode (pstate_za|pstate_sm), before_call, expected_value

diff  --git a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
index 3fa1ee5b9b0114..dba3227459b906 100644
--- a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
+++ b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
@@ -38,4 +38,43 @@ define void @streaming_compatible() #0 {
 
 declare void @non_streaming()
 
+
+; Verify that COALESCER_BARRIER is also supported without +sme.
+
+define void @streaming_compatible_arg(float %f) #0 {
+; CHECK-LABEL: streaming_compatible_arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload
+; CHECK-NEXT:    and x19, x0, #0x1
+; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill
+; CHECK-NEXT:    tbz w19, #0, .LBB1_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload
+; CHECK-NEXT:    bl non_streaming
+; CHECK-NEXT:    tbz w19, #0, .LBB1_4
+; CHECK-NEXT:  // %bb.3:
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:  .LBB1_4:
+; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #96
+; CHECK-NEXT:    ret
+  call void @non_streaming(float %f)
+  ret void
+}
+
+
 attributes #0 = { nounwind "aarch64_pstate_sm_compatible" }


        


More information about the llvm-commits mailing list