[llvm] [AArch64][SME2] Extend SMEABIPass to handle functions with new ZT0 state (PR #78848)
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 20 06:58:47 PST 2024
https://github.com/kmclaughlin-arm created https://github.com/llvm/llvm-project/pull/78848
updateNewZAFunctions is extended to generate the following on entry to a
function with either the "aarch64_pstate_za_new" or "arm_new_zt0" attribute:
- Private-ZA interface: commit any active lazy-saves & enable PSTATE.ZA.
- "aarch64_pstate_za_new": zero ZA.
- "arm_new_zt0": zero ZT0.
Additionally, PSTATE.ZA should disabled before returning if the function
has a private-ZA interface.
>From 9b5f6614d14c1075fa13cf62cfb3ead855994a45 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Wed, 17 Jan 2024 14:54:04 +0000
Subject: [PATCH 1/2] Add tests with ZT0 new state
---
llvm/test/CodeGen/AArch64/sme-zt0-state.ll | 110 +++++++++++++++++++++
1 file changed, 110 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
index 88eaf19ec488f3d..69dc8ee1dd4bda0 100644
--- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
@@ -153,3 +153,113 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
call void @callee() "aarch64_new_zt0";
ret void;
}
+
+;
+; New-ZA Caller
+;
+
+; Expect commit of lazy-save if ZA is dormant
+; Expect smstart ZA & clear ZT0
+; Before return, expect smstop ZA
+define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
+; CHECK-LABEL: zt0_new_caller:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: bl callee
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ call void @callee() "aarch64_in_zt0";
+ ret void;
+}
+
+; Expect commit of lazy-save if ZA is dormant
+; Expect smstart ZA, clear ZA & clear ZT0
+; Before return, expect smstop ZA
+define void @new_za_zt0_caller() "aarch64_pstate_za_new" "aarch64_new_zt0" nounwind {
+; CHECK-LABEL: new_za_zt0_caller:
+; CHECK: // %bb.0: // %prelude
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: rdsvl x8, #1
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: msub x8, x8, x8, x9
+; CHECK-NEXT: mov sp, x8
+; CHECK-NEXT: stur wzr, [x29, #-4]
+; CHECK-NEXT: sturh wzr, [x29, #-6]
+; CHECK-NEXT: stur x8, [x29, #-16]
+; CHECK-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-NEXT: cbz x8, .LBB7_2
+; CHECK-NEXT: // %bb.1: // %save.za
+; CHECK-NEXT: sub x8, x29, #80
+; CHECK-NEXT: str zt0, [x8]
+; CHECK-NEXT: bl __arm_tpidr2_save
+; CHECK-NEXT: ldr zt0, [x8]
+; CHECK-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: zero {za}
+; CHECK-NEXT: bl callee
+; CHECK-NEXT: smstop za
+; CHECK-NEXT: mov sp, x29
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ call void @callee() "aarch64_pstate_za_shared" "aarch64_in_zt0";
+ ret void;
+}
+
+; Expect clear ZA on entry
+define void @new_za_shared_zt0_caller() "aarch64_pstate_za_new" "aarch64_in_zt0" nounwind {
+; CHECK-LABEL: new_za_shared_zt0_caller:
+; CHECK: // %bb.0: // %prelude
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: rdsvl x8, #1
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: msub x8, x8, x8, x9
+; CHECK-NEXT: mov sp, x8
+; CHECK-NEXT: stur wzr, [x29, #-4]
+; CHECK-NEXT: sturh wzr, [x29, #-6]
+; CHECK-NEXT: stur x8, [x29, #-16]
+; CHECK-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-NEXT: cbz x8, .LBB8_2
+; CHECK-NEXT: // %bb.1: // %save.za
+; CHECK-NEXT: sub x8, x29, #80
+; CHECK-NEXT: str zt0, [x8]
+; CHECK-NEXT: bl __arm_tpidr2_save
+; CHECK-NEXT: ldr zt0, [x8]
+; CHECK-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEXT: .LBB8_2:
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: zero {za}
+; CHECK-NEXT: bl callee
+; CHECK-NEXT: smstop za
+; CHECK-NEXT: mov sp, x29
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ call void @callee() "aarch64_pstate_za_shared" "aarch64_in_zt0";
+ ret void;
+}
+
+; Expect clear ZT0 on entry
+define void @shared_za_new_zt0() "aarch64_pstate_za_shared" "aarch64_new_zt0" nounwind {
+; CHECK-LABEL: shared_za_new_zt0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: rdsvl x8, #1
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: msub x8, x8, x8, x9
+; CHECK-NEXT: mov sp, x8
+; CHECK-NEXT: stur wzr, [x29, #-4]
+; CHECK-NEXT: sturh wzr, [x29, #-6]
+; CHECK-NEXT: stur x8, [x29, #-16]
+; CHECK-NEXT: bl callee
+; CHECK-NEXT: mov sp, x29
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ call void @callee() "aarch64_pstate_za_shared" "aarch64_in_zt0";
+ ret void;
+}
>From 5b0ab609e371a2622ea299e41f12a53551a9e464 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Wed, 17 Jan 2024 14:54:04 +0000
Subject: [PATCH 2/2] [AArch64][SME2] Extend SMEABIPass to handle functions
with new ZT0 state
updateNewZAFunctions is extended to generate the following at on
entry to a function with either the "aarch64_pstate_za_new" or
"arm_new_zt0" attributes:
- Private-ZA interface: commit any active lazy-saves & enable PSTATE.ZA.
- "aarch64_pstate_za_new": zero ZA.
- "arm_new_zt0": zero ZT0.
Additionally, PSTATE.ZA should disabled before returning if the function
has a private-ZA interface.
---
llvm/lib/Target/AArch64/SMEABIPass.cpp | 124 +++++++++++-------
.../AArch64/Utils/AArch64SMEAttributes.cpp | 10 +-
llvm/test/CodeGen/AArch64/sme-zt0-state.ll | 33 +++--
3 files changed, 95 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Target/AArch64/SMEABIPass.cpp b/llvm/lib/Target/AArch64/SMEABIPass.cpp
index 3315171798d9f1b..0450e2f6f286e13 100644
--- a/llvm/lib/Target/AArch64/SMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/SMEABIPass.cpp
@@ -40,7 +40,8 @@ struct SMEABI : public FunctionPass {
bool runOnFunction(Function &F) override;
private:
- bool updateNewZAFunctions(Module *M, Function *F, IRBuilder<> &Builder);
+ bool updateNewStateFunctions(Module *M, Function *F, IRBuilder<> &Builder,
+ SMEAttrs FnAttrs);
};
} // end anonymous namespace
@@ -76,56 +77,79 @@ void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) {
Builder.getInt64(0));
}
-/// This function generates code to commit a lazy save at the beginning of a
-/// function marked with `aarch64_pstate_za_new`. If the value read from
-/// TPIDR2_EL0 is not null on entry to the function then the lazy-saving scheme
-/// is active and we should call __arm_tpidr2_save to commit the lazy save.
-/// Additionally, PSTATE.ZA should be enabled at the beginning of the function
-/// and disabled before returning.
-bool SMEABI::updateNewZAFunctions(Module *M, Function *F,
- IRBuilder<> &Builder) {
+/// This function generates code at the beginning and end of a function marked
+/// with either `aarch64_pstate_za_new` or `arm_new_zt0`.
+/// At the beginning of the function, the following code is generated:
+/// - Commit lazy-save if active [Private-ZA Interface]
+/// - Enable PSTATE.ZA [Private-ZA Interface]
+/// - Zero ZA [Has New ZA State]
+/// - Zero ZT0 [Has New ZT0 State]
+/// At the end of the function, PSTATE.ZA is disabled if the function has a
+/// Private-ZA Interface. A function is considered to have a Private-ZA
+/// interface if it does not share ZA or ZT0.
+///
+bool SMEABI::updateNewStateFunctions(Module *M, Function *F,
+ IRBuilder<> &Builder, SMEAttrs FnAttrs) {
LLVMContext &Context = F->getContext();
BasicBlock *OrigBB = &F->getEntryBlock();
-
- // Create the new blocks for reading TPIDR2_EL0 & enabling ZA state.
- auto *SaveBB = OrigBB->splitBasicBlock(OrigBB->begin(), "save.za", true);
- auto *PreludeBB = BasicBlock::Create(Context, "prelude", F, SaveBB);
-
- // Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0.
- Builder.SetInsertPoint(PreludeBB);
- Function *TPIDR2Intr =
- Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_get_tpidr2);
- auto *TPIDR2 = Builder.CreateCall(TPIDR2Intr->getFunctionType(), TPIDR2Intr,
- {}, "tpidr2");
- auto *Cmp =
- Builder.CreateCmp(ICmpInst::ICMP_NE, TPIDR2, Builder.getInt64(0), "cmp");
- Builder.CreateCondBr(Cmp, SaveBB, OrigBB);
-
- // Create a call __arm_tpidr2_save, which commits the lazy save.
- Builder.SetInsertPoint(&SaveBB->back());
- emitTPIDR2Save(M, Builder);
-
- // Enable pstate.za at the start of the function.
Builder.SetInsertPoint(&OrigBB->front());
- Function *EnableZAIntr =
- Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable);
- Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr);
-
- // ZA state must be zeroed upon entry to a function with NewZA
- Function *ZeroIntr =
- Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_zero);
- Builder.CreateCall(ZeroIntr->getFunctionType(), ZeroIntr,
- Builder.getInt32(0xff));
-
- // Before returning, disable pstate.za
- for (BasicBlock &BB : *F) {
- Instruction *T = BB.getTerminator();
- if (!T || !isa<ReturnInst>(T))
- continue;
- Builder.SetInsertPoint(T);
- Function *DisableZAIntr =
- Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_disable);
- Builder.CreateCall(DisableZAIntr->getFunctionType(), DisableZAIntr);
+
+ // Commit any active lazy-saves if this is a Private-ZA function. If the
+ // value read from TPIDR2_EL0 is not null on entry to the function then
+ // the lazy-saving scheme is active and we should call __arm_tpidr2_save
+ // to commit the lazy save.
+ if (FnAttrs.hasPrivateZAInterface()) {
+ // Create the new blocks for reading TPIDR2_EL0 & enabling ZA state.
+ auto *SaveBB = OrigBB->splitBasicBlock(OrigBB->begin(), "save.za", true);
+ auto *PreludeBB = BasicBlock::Create(Context, "prelude", F, SaveBB);
+
+ // Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0.
+ Builder.SetInsertPoint(PreludeBB);
+ Function *TPIDR2Intr =
+ Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_get_tpidr2);
+ auto *TPIDR2 = Builder.CreateCall(TPIDR2Intr->getFunctionType(), TPIDR2Intr,
+ {}, "tpidr2");
+ auto *Cmp = Builder.CreateCmp(ICmpInst::ICMP_NE, TPIDR2,
+ Builder.getInt64(0), "cmp");
+ Builder.CreateCondBr(Cmp, SaveBB, OrigBB);
+
+ // Create a call __arm_tpidr2_save, which commits the lazy save.
+ Builder.SetInsertPoint(&SaveBB->back());
+ emitTPIDR2Save(M, Builder);
+
+ // Enable pstate.za at the start of the function.
+ Builder.SetInsertPoint(&OrigBB->front());
+ Function *EnableZAIntr =
+ Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable);
+ Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr);
+ }
+
+ if (FnAttrs.hasNewZABody()) {
+ // ZA state must be zeroed upon entry to a function with NewZA
+ Function *ZeroIntr =
+ Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_zero);
+ Builder.CreateCall(ZeroIntr->getFunctionType(), ZeroIntr,
+ Builder.getInt32(0xff));
+ }
+
+ if (FnAttrs.isNewZT0()) {
+ Function *ClearZT0Intr =
+ Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_zero_zt);
+ Builder.CreateCall(ClearZT0Intr->getFunctionType(), ClearZT0Intr,
+ {Builder.getInt32(0)});
+ }
+
+ if (FnAttrs.hasPrivateZAInterface()) {
+ // Before returning, disable pstate.za
+ for (BasicBlock &BB : *F) {
+ Instruction *T = BB.getTerminator();
+ if (!T || !isa<ReturnInst>(T))
+ continue;
+ Builder.SetInsertPoint(T);
+ Function *DisableZAIntr =
+ Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_disable);
+ Builder.CreateCall(DisableZAIntr->getFunctionType(), DisableZAIntr);
+ }
}
F->addFnAttr("aarch64_expanded_pstate_za");
@@ -142,8 +166,8 @@ bool SMEABI::runOnFunction(Function &F) {
bool Changed = false;
SMEAttrs FnAttrs(F);
- if (FnAttrs.hasNewZABody())
- Changed |= updateNewZAFunctions(M, &F, Builder);
+ if (FnAttrs.hasNewZABody() || FnAttrs.isNewZT0())
+ Changed |= updateNewStateFunctions(M, &F, Builder, FnAttrs);
return Changed;
}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index 9693b6a664be262..c47ce42dcbd2876 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -49,11 +49,13 @@ SMEAttrs::SMEAttrs(const CallBase &CB) {
SMEAttrs::SMEAttrs(StringRef FuncName) : Bitmask(0) {
if (FuncName == "__arm_tpidr2_save" || FuncName == "__arm_sme_state")
- Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Preserved |
- SMEAttrs::ZA_NoLazySave);
+ Bitmask |=
+ (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Preserved |
+ SMEAttrs::ZA_NoLazySave | encodeZT0State(StateValue::Preserved));
if (FuncName == "__arm_tpidr2_restore")
- Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared |
- SMEAttrs::ZA_NoLazySave);
+ Bitmask |=
+ (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared |
+ SMEAttrs::ZA_NoLazySave | encodeZT0State(StateValue::Preserved));
}
SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
index 69dc8ee1dd4bda0..b93e865772eb9a3 100644
--- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
@@ -163,9 +163,18 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
; Before return, expect smstop ZA
define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
; CHECK-LABEL: zt0_new_caller:
-; CHECK: // %bb.0:
+; CHECK: // %bb.0: // %prelude
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-NEXT: cbz x8, .LBB6_2
+; CHECK-NEXT: // %bb.1: // %save.za
+; CHECK-NEXT: bl __arm_tpidr2_save
+; CHECK-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEXT: .LBB6_2:
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: zero { zt0 }
; CHECK-NEXT: bl callee
+; CHECK-NEXT: smstop za
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
call void @callee() "aarch64_in_zt0";
@@ -180,7 +189,7 @@ define void @new_za_zt0_caller() "aarch64_pstate_za_new" "aarch64_new_zt0" nounw
; CHECK: // %bb.0: // %prelude
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x8, x8, x8, x9
@@ -191,14 +200,12 @@ define void @new_za_zt0_caller() "aarch64_pstate_za_new" "aarch64_new_zt0" nounw
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: cbz x8, .LBB7_2
; CHECK-NEXT: // %bb.1: // %save.za
-; CHECK-NEXT: sub x8, x29, #80
-; CHECK-NEXT: str zt0, [x8]
; CHECK-NEXT: bl __arm_tpidr2_save
-; CHECK-NEXT: ldr zt0, [x8]
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: .LBB7_2:
; CHECK-NEXT: smstart za
; CHECK-NEXT: zero {za}
+; CHECK-NEXT: zero { zt0 }
; CHECK-NEXT: bl callee
; CHECK-NEXT: smstop za
; CHECK-NEXT: mov sp, x29
@@ -211,10 +218,10 @@ define void @new_za_zt0_caller() "aarch64_pstate_za_new" "aarch64_new_zt0" nounw
; Expect clear ZA on entry
define void @new_za_shared_zt0_caller() "aarch64_pstate_za_new" "aarch64_in_zt0" nounwind {
; CHECK-LABEL: new_za_shared_zt0_caller:
-; CHECK: // %bb.0: // %prelude
+; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x8, x8, x8, x9
@@ -222,19 +229,8 @@ define void @new_za_shared_zt0_caller() "aarch64_pstate_za_new" "aarch64_in_zt0"
; CHECK-NEXT: stur wzr, [x29, #-4]
; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur x8, [x29, #-16]
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: cbz x8, .LBB8_2
-; CHECK-NEXT: // %bb.1: // %save.za
-; CHECK-NEXT: sub x8, x29, #80
-; CHECK-NEXT: str zt0, [x8]
-; CHECK-NEXT: bl __arm_tpidr2_save
-; CHECK-NEXT: ldr zt0, [x8]
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: .LBB8_2:
-; CHECK-NEXT: smstart za
; CHECK-NEXT: zero {za}
; CHECK-NEXT: bl callee
-; CHECK-NEXT: smstop za
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
@@ -256,6 +252,7 @@ define void @shared_za_new_zt0() "aarch64_pstate_za_shared" "aarch64_new_zt0" no
; CHECK-NEXT: stur wzr, [x29, #-4]
; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur x8, [x29, #-16]
+; CHECK-NEXT: zero { zt0 }
; CHECK-NEXT: bl callee
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
More information about the llvm-commits
mailing list