[llvm] 3efe832 - [AArch64] Fix chain for calls from agnostic-ZA functions.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 13 04:07:21 PST 2025
Author: Sander de Smalen
Date: 2025-01-13T12:06:50Z
New Revision: 3efe83291f07dcf2423065e63b826407d1ec2609
URL: https://github.com/llvm/llvm-project/commit/3efe83291f07dcf2423065e63b826407d1ec2609
DIFF: https://github.com/llvm/llvm-project/commit/3efe83291f07dcf2423065e63b826407d1ec2609.diff
LOG: [AArch64] Fix chain for calls from agnostic-ZA functions.
The lowering code was using the wrong chain value, which meant that
the 'smstart' after the call from streaming agnostic-ZA functions ->
non-streaming private-ZA functions was incorrectly removed from the DAG.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d9877fef1437cb..278dd95cd969d8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9664,7 +9664,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
DAG.getConstant(0, DL, MVT::i64));
TPIDR2.Uses++;
} else if (RequiresSaveAllZA) {
- Result = emitSMEStateSaveRestore(*this, DAG, FuncInfo, DL, Chain,
+ Result = emitSMEStateSaveRestore(*this, DAG, FuncInfo, DL, Result,
/*IsSave=*/false);
}
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index 97522b9a319c09..1f688154110973 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -82,3 +82,121 @@ define i64 @shared_caller_agnostic_callee(i64 %v) nounwind "aarch64_inout_za" "a
%res = call i64 @agnostic_decl(i64 %v)
ret i64 %res
}
+
+; agnostic-ZA + streaming -> private-ZA + non-streaming
+define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: mov x9, x0
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: bl __arm_get_current_vg
+; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: mov x0, x9
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: bl __arm_sme_state_size
+; CHECK-NEXT: sub sp, sp, x0
+; CHECK-NEXT: mov x20, sp
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: bl __arm_sme_save
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: bl private_za_decl
+; CHECK-NEXT: mov x1, x0
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: bl __arm_sme_restore
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: bl __arm_sme_save
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: mov x0, x1
+; CHECK-NEXT: bl private_za_decl
+; CHECK-NEXT: mov x1, x0
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: bl __arm_sme_restore
+; CHECK-NEXT: mov x0, x1
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %res = call i64 @private_za_decl(i64 %v)
+ %res2 = call i64 @private_za_decl(i64 %res)
+ ret i64 %res2
+}
+
+; agnostic-ZA + streaming-compatible -> private-ZA + non-streaming
+define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: mov x9, x0
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: bl __arm_get_current_vg
+; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: mov x0, x9
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: bl __arm_sme_state_size
+; CHECK-NEXT: sub sp, sp, x0
+; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: bl __arm_sme_save
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and x20, x0, #0x1
+; CHECK-NEXT: tbz w20, #0, .LBB5_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: bl private_za_decl
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: tbz w20, #0, .LBB5_4
+; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .LBB5_4:
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: bl __arm_sme_restore
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: bl __arm_sme_save
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and x20, x0, #0x1
+; CHECK-NEXT: tbz w20, #0, .LBB5_6
+; CHECK-NEXT: // %bb.5:
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB5_6:
+; CHECK-NEXT: mov x0, x2
+; CHECK-NEXT: bl private_za_decl
+; CHECK-NEXT: mov x1, x0
+; CHECK-NEXT: tbz w20, #0, .LBB5_8
+; CHECK-NEXT: // %bb.7:
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .LBB5_8:
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: bl __arm_sme_restore
+; CHECK-NEXT: mov x0, x1
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %res = call i64 @private_za_decl(i64 %v)
+ %res2 = call i64 @private_za_decl(i64 %res)
+ ret i64 %res2
+}
More information about the llvm-commits
mailing list