[llvm] [AArch64][SME] Zero reserved bytes when allocating a new TPIDR2 object (PR #68411)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 6 05:29:57 PDT 2023
https://github.com/kmclaughlin-arm created https://github.com/llvm/llvm-project/pull/68411
SME support routines expect that the reserved bytes of TPIDR2 (bytes 10-15) are zero. This patch ensures that the reserved bytes are cleared when allocating a new TPIDR2 block.
>From c87808411d0c3ec1bf98d55e4c8b611f0d7e2165 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Fri, 6 Oct 2023 10:11:21 +0000
Subject: [PATCH] [AArch64][SME] Zero reserved bytes when allocating a new
TPIDR2 object
SME support routines expect that the reserved bytes of TPIDR2 (bytes 10-15)
are zero. This patch ensures that the reserved bytes are cleared when
allocating a new TPIDR2 block.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 11 ++++++++
.../AArch64/sme-disable-gisel-fisel.ll | 16 ++++++++---
.../CodeGen/AArch64/sme-lazy-save-call.ll | 27 ++++++++++++-------
.../AArch64/sme-shared-za-interface.ll | 12 ++++++---
4 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3ae7a893ca4e9e3..4e0f0041ce14e56 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6382,6 +6382,17 @@ AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
Chain = DAG.getStore(Chain, DL, Buffer, Ptr, MPI);
+ // Set the reserved bytes (10-15) to zero
+ EVT PtrTy = Ptr.getValueType();
+ SDValue ReservedPtr =
+ DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(10, DL, PtrTy));
+ Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i16), ReservedPtr,
+ MPI);
+ ReservedPtr =
+ DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(12, DL, PtrTy));
+ Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i32), ReservedPtr,
+ MPI);
+
return TPIDR2Obj;
}
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 98a8769afea8513..b2bb62c42b11acd 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -220,6 +220,8 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o
; CHECK-COMMON-NEXT: msub x8, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x8
; CHECK-COMMON-NEXT: stur x8, [x29, #-16]
+; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
+; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: cbz x8, .LBB6_2
; CHECK-COMMON-NEXT: b .LBB6_1
@@ -256,6 +258,8 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline
; CHECK-COMMON-NEXT: subs x9, x9, x8
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
+; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
+; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
; CHECK-COMMON-NEXT: sub x8, x29, #16
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
@@ -294,10 +298,12 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwi
; CHECK-COMMON-NEXT: mul x8, x8, x8
; CHECK-COMMON-NEXT: sub x9, x9, x8
; CHECK-COMMON-NEXT: mov sp, x9
+; CHECK-COMMON-NEXT: sub x10, x29, #16
+; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
+; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
-; CHECK-COMMON-NEXT: sub x9, x29, #16
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
-; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
; CHECK-COMMON-NEXT: bl __addtf3
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
@@ -355,10 +361,12 @@ define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nou
; CHECK-COMMON-NEXT: mul x8, x8, x8
; CHECK-COMMON-NEXT: sub x9, x9, x8
; CHECK-COMMON-NEXT: mov sp, x9
+; CHECK-COMMON-NEXT: sub x10, x29, #16
+; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
+; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
-; CHECK-COMMON-NEXT: sub x9, x29, #16
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
-; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
; CHECK-COMMON-NEXT: bl fmod
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index ad16402a18f8b92..3490817b8a49e68 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -17,10 +17,12 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x9, x9, x8
; CHECK-NEXT: mov sp, x9
+; CHECK-NEXT: sub x10, x29, #16
+; CHECK-NEXT: stur wzr, [x29, #-4]
+; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur x9, [x29, #-16]
-; CHECK-NEXT: sub x9, x29, #16
; CHECK-NEXT: sturh w8, [x29, #-8]
-; CHECK-NEXT: msr TPIDR2_EL0, x9
+; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -51,6 +53,8 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
; CHECK-NEXT: sub x8, x8, x19
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: sub x20, x29, #16
+; CHECK-NEXT: stur wzr, [x29, #-4]
+; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur x8, [x29, #-16]
; CHECK-NEXT: sturh w19, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x20
@@ -95,10 +99,12 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_psta
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x9, x9, x8
; CHECK-NEXT: mov sp, x9
+; CHECK-NEXT: sub x10, x29, #16
+; CHECK-NEXT: stur wzr, [x29, #-4]
+; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur x9, [x29, #-16]
-; CHECK-NEXT: sub x9, x29, #16
; CHECK-NEXT: sturh w8, [x29, #-8]
-; CHECK-NEXT: msr TPIDR2_EL0, x9
+; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: bl cosf
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -132,10 +138,12 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x9, x9, x8
; CHECK-NEXT: mov sp, x9
+; CHECK-NEXT: sub x10, x29, #80
+; CHECK-NEXT: stur wzr, [x29, #-68]
+; CHECK-NEXT: sturh wzr, [x29, #-70]
; CHECK-NEXT: stur x9, [x29, #-80]
-; CHECK-NEXT: sub x9, x29, #80
; CHECK-NEXT: sturh w8, [x29, #-72]
-; CHECK-NEXT: msr TPIDR2_EL0, x9
+; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB3_2
@@ -185,10 +193,9 @@ define void @za_shared_caller_za_preserved_callee() nounwind "aarch64_pstate_za_
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x8, x8, x8, x9
; CHECK-NEXT: mov sp, x8
-; CHECK-NEXT: stur x8, [x29, #-80]
-; CHECK-NEXT: sub x8, x29, #80
-; CHECK-NEXT: sturh wzr, [x29, #-72]
-; CHECK-NEXT: msr TPIDR2_EL0, x8
+; CHECK-NEXT: sub x9, x29, #80
+; CHECK-NEXT: stp x8, xzr, [x29, #-80]
+; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB4_2
diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
index de7df1c9831908f..ddb3dc6324caea4 100644
--- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
@@ -15,10 +15,12 @@ define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind {
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x9, x9, x8
; CHECK-NEXT: mov sp, x9
+; CHECK-NEXT: sub x10, x29, #16
+; CHECK-NEXT: stur wzr, [x29, #-4]
+; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur x9, [x29, #-16]
-; CHECK-NEXT: sub x9, x29, #16
; CHECK-NEXT: sturh w8, [x29, #-8]
-; CHECK-NEXT: msr TPIDR2_EL0, x9
+; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -47,10 +49,12 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwi
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x9, x9, x8
; CHECK-NEXT: mov sp, x9
+; CHECK-NEXT: sub x10, x29, #16
+; CHECK-NEXT: stur wzr, [x29, #-4]
+; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur x9, [x29, #-16]
-; CHECK-NEXT: sub x9, x29, #16
; CHECK-NEXT: sturh w8, [x29, #-8]
-; CHECK-NEXT: msr TPIDR2_EL0, x9
+; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
More information about the llvm-commits
mailing list