[llvm] [AArch64] Disable FastISel/GlobalISel for ZT0 state (PR #82768)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 28 00:05:51 PST 2024
https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/82768
>From 8e3cae9f6e935f3e3066702c8d603009e3085b73 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 23 Feb 2024 13:34:04 +0000
Subject: [PATCH 1/2] [AArch64] Disable FastISel/GlobalISel for ZT0 state
For __arm_new("zt0") we need to have special setup code in the prologue.
For calls that don't preserve zt0, we need to emit code preserve ZT0 around
the call.
This is only emitted by SelectionDAG ISel at the moment.
---
llvm/lib/Target/AArch64/AArch64FastISel.cpp | 3 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 3 +-
.../AArch64/GISel/AArch64CallLowering.cpp | 3 +-
.../AArch64/sme-disable-gisel-fisel.ll | 65 ++++++++++++++++++-
4 files changed, 69 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 635beeed0df833..49bcab588e5204 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -5179,7 +5179,8 @@ FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
SMEAttrs CallerAttrs(*FuncInfo.Fn);
- if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() ||
+ if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
+ CallerAttrs.hasStreamingInterfaceOrBody() ||
CallerAttrs.hasStreamingCompatibleInterface())
return nullptr;
return new AArch64FastISel(FuncInfo, LibInfo);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3b92e95d7c2876..b86661c8a4e0d4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25892,7 +25892,8 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
auto CallerAttrs = SMEAttrs(*Inst.getFunction());
auto CalleeAttrs = SMEAttrs(*Base);
if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
- CallerAttrs.requiresLazySave(CalleeAttrs))
+ CallerAttrs.requiresLazySave(CalleeAttrs) ||
+ CallerAttrs.requiresPreservingZT0(CalleeAttrs))
return true;
}
return false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 3dc3d31a34e84e..26dbad713594aa 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -535,7 +535,8 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
}
SMEAttrs Attrs(F);
- if (Attrs.hasZAState() || Attrs.hasStreamingInterfaceOrBody() ||
+ if (Attrs.hasZAState() || Attrs.hasZT0State() ||
+ Attrs.hasStreamingInterfaceOrBody() ||
Attrs.hasStreamingCompatibleInterface())
return true;
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 2a78012045ff42..5c4c5a10447311 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme < %s \
+; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FISEL
-; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme < %s \
+; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-GISEL
@@ -447,3 +447,64 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
%res = frem float %a, %b
ret float %res
}
+
+;
+; Check ZT0 State
+;
+
+declare double @zt0_shared_callee(double) "aarch64_inout_zt0"
+
+define double @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline optnone "aarch64_new_zt0" {
+; CHECK-COMMON-LABEL: zt0_new_caller_to_zt0_shared_callee:
+; CHECK-COMMON: // %bb.0: // %prelude
+; CHECK-COMMON-NEXT: sub sp, sp, #80
+; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-COMMON-NEXT: cbz x8, .LBB13_2
+; CHECK-COMMON-NEXT: b .LBB13_1
+; CHECK-COMMON-NEXT: .LBB13_1: // %save.za
+; CHECK-COMMON-NEXT: mov x8, sp
+; CHECK-COMMON-NEXT: str zt0, [x8]
+; CHECK-COMMON-NEXT: bl __arm_tpidr2_save
+; CHECK-COMMON-NEXT: ldr zt0, [x8]
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-COMMON-NEXT: b .LBB13_2
+; CHECK-COMMON-NEXT: .LBB13_2: // %entry
+; CHECK-COMMON-NEXT: smstart za
+; CHECK-COMMON-NEXT: zero { zt0 }
+; CHECK-COMMON-NEXT: bl zt0_shared_callee
+; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
+; CHECK-COMMON-NEXT: fmov d1, x8
+; CHECK-COMMON-NEXT: fadd d0, d0, d1
+; CHECK-COMMON-NEXT: smstop za
+; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: add sp, sp, #80
+; CHECK-COMMON-NEXT: ret
+entry:
+ %call = call double @zt0_shared_callee(double %x)
+ %add = fadd double %call, 4.200000e+01
+ ret double %add;
+}
+
+define double @zt0_new_caller_to_normal_callee(double %x) nounwind noinline optnone "aarch64_inout_zt0" {
+; CHECK-COMMON-LABEL: zt0_new_caller_to_normal_callee:
+; CHECK-COMMON: // %bb.0: // %entry
+; CHECK-COMMON-NEXT: sub sp, sp, #80
+; CHECK-COMMON-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: mov x19, sp
+; CHECK-COMMON-NEXT: str zt0, [x19]
+; CHECK-COMMON-NEXT: smstop za
+; CHECK-COMMON-NEXT: bl normal_callee
+; CHECK-COMMON-NEXT: smstart za
+; CHECK-COMMON-NEXT: ldr zt0, [x19]
+; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
+; CHECK-COMMON-NEXT: fmov d1, x8
+; CHECK-COMMON-NEXT: fadd d0, d0, d1
+; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: add sp, sp, #80
+; CHECK-COMMON-NEXT: ret
+entry:
+ %call = call double @normal_callee(double %x)
+ %add = fadd double %call, 4.200000e+01
+ ret double %add;
+}
>From 759eea02813ea59ce94853aa499306e41fc74061 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Wed, 28 Feb 2024 07:42:43 +0000
Subject: [PATCH 2/2] NFC: Rename test
---
llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 5c4c5a10447311..cd348be5d771d1 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -486,8 +486,8 @@ entry:
ret double %add;
}
-define double @zt0_new_caller_to_normal_callee(double %x) nounwind noinline optnone "aarch64_inout_zt0" {
-; CHECK-COMMON-LABEL: zt0_new_caller_to_normal_callee:
+define double @zt0_shared_caller_to_normal_callee(double %x) nounwind noinline optnone "aarch64_inout_zt0" {
+; CHECK-COMMON-LABEL: zt0_shared_caller_to_normal_callee:
; CHECK-COMMON: // %bb.0: // %entry
; CHECK-COMMON-NEXT: sub sp, sp, #80
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
More information about the llvm-commits
mailing list