[llvm] [AArch64] Fix case of 0 dynamic alloc when stack probing (PR #74806)
Oskar Wirga via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 7 20:58:16 PST 2023
https://github.com/oskarwirga created https://github.com/llvm/llvm-project/pull/74806
This is my first attempt at fixing a subtle bug which has manifested as part of stack probing for AArch64. If the dynamic allocation size is 0, then we will still probe the current `sp` value despite not decrementing `sp`! This results in overwriting stack data, in my case the stack canary.
The fix here is the create a pretest condition which checks if `sp` == `TargetReg`. If this is the case, we skip past the whole loop and probe, if not we dynamically probe as normal. This case adds 2 extra instructions, I spent some time trying to think of a more optimal solution, but I couldn't find something as compressed as the current loop.
>From 206635a6a600abc58c226585438b6c2eb8244e13 Mon Sep 17 00:00:00 2001
From: Oskar Wirga <10386631+oskarwirga at users.noreply.github.com>
Date: Fri, 8 Dec 2023 04:54:06 +0000
Subject: [PATCH] [AArch64] Fix case of 0 dynamic alloc when stack probing
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 42 +++++++++++++++--
.../test/CodeGen/AArch64/stack-probing-64k.ll | 3 ++
.../CodeGen/AArch64/stack-probing-dynamic.ll | 46 ++++++++++++++-----
.../test/CodeGen/AArch64/stack-probing-sve.ll | 15 ++++++
llvm/test/CodeGen/AArch64/stack-probing.ll | 3 ++
5 files changed, 93 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 93b8295f4f3ef..5ab05125a65cf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -9469,6 +9469,7 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
+
MachineBasicBlock::iterator
AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
Register TargetReg, bool FrameSetup) const {
@@ -9482,6 +9483,9 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
DebugLoc DL = MBB.findDebugLoc(MBBI);
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *PreTestMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, PreTestMBB);
MachineBasicBlock *LoopTestMBB =
MF.CreateMachineBasicBlock(MBB.getBasicBlock());
MF.insert(MBBInsertPoint, LoopTestMBB);
@@ -9490,9 +9494,27 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
MF.insert(MBBInsertPoint, LoopBodyMBB);
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
MF.insert(MBBInsertPoint, ExitMBB);
+ MachineBasicBlock *AfterLoopExitMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, AfterLoopExitMBB);
MachineInstr::MIFlag Flags =
FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags;
+ // PreTest:
+ // Compare SP and TargetReg
+ BuildMI(*PreTestMBB, PreTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+ AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addReg(TargetReg)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlags(Flags);
+
+ // B.EQ AfterLoopExit
+ BuildMI(*PreTestMBB, PreTestMBB->end(), DL, TII->get(AArch64::Bcc))
+ .addImm(AArch64CC::EQ)
+ .addMBB(AfterLoopExitMBB)
+ .setMIFlags(Flags);
+
// LoopTest:
// SUB SP, SP, #ProbeSize
emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
@@ -9506,7 +9528,7 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
.setMIFlags(Flags);
- // B.<Cond> LoopExit
+ // B.LE LoopExit
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
.addImm(AArch64CC::LE)
.addMBB(ExitMBB)
@@ -9539,22 +9561,32 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
.addImm(0)
.setMIFlags(Flags);
- ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
- ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ // B AfterLoopExit
+ BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::B))
+ .addMBB(AfterLoopExitMBB);
+
+ AfterLoopExitMBB->splice(AfterLoopExitMBB->end(), &MBB, std::next(MBBI),
+ MBB.end());
+ AfterLoopExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ PreTestMBB->addSuccessor(LoopTestMBB);
+ PreTestMBB->addSuccessor(AfterLoopExitMBB);
LoopTestMBB->addSuccessor(ExitMBB);
LoopTestMBB->addSuccessor(LoopBodyMBB);
LoopBodyMBB->addSuccessor(LoopTestMBB);
- MBB.addSuccessor(LoopTestMBB);
+ ExitMBB->addSuccessor(AfterLoopExitMBB);
+ MBB.addSuccessor(PreTestMBB);
// Update liveins.
if (MF.getRegInfo().reservedRegsFrozen()) {
+ recomputeLiveIns(*PreTestMBB);
recomputeLiveIns(*LoopTestMBB);
recomputeLiveIns(*LoopBodyMBB);
recomputeLiveIns(*ExitMBB);
+ recomputeLiveIns(*AfterLoopExitMBB);
}
- return ExitMBB->begin();
+ return AfterLoopExitMBB->begin();
}
#define GET_INSTRINFO_HELPERS
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-64k.ll b/llvm/test/CodeGen/AArch64/stack-probing-64k.ll
index 945c271d37500..d844ea8b3010e 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-64k.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-64k.ll
@@ -302,6 +302,8 @@ define void @static_16_align_131072(ptr %out) #0 {
; CHECK-NEXT: sub x9, sp, #31, lsl #12 // =126976
; CHECK-NEXT: sub x9, x9, #4080
; CHECK-NEXT: and x9, x9, #0xfffffffffffe0000
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB9_4
; CHECK-NEXT: .LBB9_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
@@ -314,6 +316,7 @@ define void @static_16_align_131072(ptr %out) #0 {
; CHECK-NEXT: .LBB9_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB9_4: // %entry
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
index d247ed1b59977..7938771daf99a 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll
@@ -19,6 +19,8 @@ define void @dynamic(i64 %size, ptr %out) #0 {
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: b.eq .LBB0_4
; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x8
@@ -29,6 +31,7 @@ define void @dynamic(i64 %size, ptr %out) #0 {
; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
@@ -59,10 +62,12 @@ define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
; CHECK-NEXT: str xzr, [sp, #-64]!
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: sub x10, x29, #64
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
-; CHECK-NEXT: str x10, [x1]
+; CHECK-NEXT: sub x10, x29, #64
; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: str x10, [x1]
+; CHECK-NEXT: b.eq .LBB1_4
; CHECK-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x8
@@ -73,6 +78,7 @@ define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
; CHECK-NEXT: .LBB1_3:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: str x8, [x2]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
@@ -108,11 +114,13 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
-; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: b.eq .LBB2_4
; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x8
@@ -123,6 +131,7 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
; CHECK-NEXT: .LBB2_3:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB2_4:
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 32
@@ -156,6 +165,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096
; CHECK-NEXT: sub x9, x9, #4064
; CHECK-NEXT: and x9, x9, #0xffffffffffffe000
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB3_4
; CHECK-NEXT: .LBB3_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x9
@@ -165,23 +176,27 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_3:
; CHECK-NEXT: mov sp, x9
+; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB3_4:
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: and x8, x8, #0xffffffffffffe000
-; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: b.eq .LBB3_8
+; CHECK-NEXT: .LBB3_5: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x8
-; CHECK-NEXT: b.le .LBB3_6
-; CHECK-NEXT: // %bb.5: // in Loop: Header=BB3_4 Depth=1
+; CHECK-NEXT: b.le .LBB3_7
+; CHECK-NEXT: // %bb.6: // in Loop: Header=BB3_5 Depth=1
; CHECK-NEXT: str xzr, [sp]
-; CHECK-NEXT: b .LBB3_4
-; CHECK-NEXT: .LBB3_6:
+; CHECK-NEXT: b .LBB3_5
+; CHECK-NEXT: .LBB3_7:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB3_8:
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 32
@@ -212,6 +227,8 @@ define void @dynamic_64k_guard(i64 %size, ptr %out) #0 "stack-probe-size"="65536
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: b.eq .LBB4_4
; CHECK-NEXT: .LBB4_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: cmp sp, x8
@@ -222,6 +239,7 @@ define void @dynamic_64k_guard(i64 %size, ptr %out) #0 "stack-probe-size"="65536
; CHECK-NEXT: .LBB4_3:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB4_4:
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
@@ -254,6 +272,8 @@ define void @no_reserved_call_frame(i64 %n) #0 {
; CHECK-NEXT: add x9, x9, #15
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: cmp sp, x0
+; CHECK-NEXT: b.eq .LBB5_4
; CHECK-NEXT: .LBB5_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -266,6 +286,7 @@ define void @no_reserved_call_frame(i64 %n) #0 {
; CHECK-NEXT: .LBB5_3: // %entry
; CHECK-NEXT: mov sp, x0
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB5_4: // %entry
; CHECK-NEXT: sub sp, sp, #1104
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: bl callee_stack_args
@@ -331,10 +352,12 @@ define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" {
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: rdvl x9, #1
; CHECK-NEXT: mov x10, #15 // =0xf
-; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: madd x9, x0, x9, x10
+; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: cmp sp, x8
+; CHECK-NEXT: b.eq .LBB7_4
; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x8
@@ -345,6 +368,7 @@ define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" {
; CHECK-NEXT: .LBB7_3:
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB7_4:
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 32
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-sve.ll b/llvm/test/CodeGen/AArch64/stack-probing-sve.ll
index 4dad104e66f20..867c74001a356 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing-sve.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing-sve.ll
@@ -104,6 +104,8 @@ define void @sve_17_vector(ptr %out) #0 {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: addvl x9, sp, #-17
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 136 * VG
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB3_4
; CHECK-NEXT: .LBB3_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -116,6 +118,7 @@ define void @sve_17_vector(ptr %out) #0 {
; CHECK-NEXT: .LBB3_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB3_4: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: addvl sp, sp, #17
; CHECK-NEXT: .cfi_def_cfa wsp, 16
@@ -340,6 +343,8 @@ define void @sve_16v_1p_csr(<vscale x 4 x float> %a) #0 {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: addvl x9, sp, #-17
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 136 * VG
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB9_4
; CHECK-NEXT: .LBB9_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -352,6 +357,7 @@ define void @sve_16v_1p_csr(<vscale x 4 x float> %a) #0 {
; CHECK-NEXT: .LBB9_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB9_4: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill
@@ -456,6 +462,8 @@ define void @sve_1_vector_4096_arr(ptr %out) #0 {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0f, 0x79, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 12304 + 256 * VG
; CHECK-NEXT: addvl x9, x9, #-32
; CHECK-NEXT: .cfi_escape 0x0f, 0x0f, 0x79, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x04, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 12304 + 512 * VG
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB11_4
; CHECK-NEXT: .LBB11_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -468,6 +476,7 @@ define void @sve_1_vector_4096_arr(ptr %out) #0 {
; CHECK-NEXT: .LBB11_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB11_4: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: addvl sp, sp, #31
; CHECK-NEXT: .cfi_escape 0x0f, 0x0f, 0x8f, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x88, 0x02, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 12304 + 264 * VG
@@ -505,6 +514,8 @@ define void @sve_1_vector_16_arr_align_8192(ptr %out) #0 {
; CHECK-NEXT: sub x9, x9, #4080
; CHECK-NEXT: addvl x9, x9, #-1
; CHECK-NEXT: and x9, x9, #0xffffffffffffe000
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB12_4
; CHECK-NEXT: .LBB12_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -517,6 +528,7 @@ define void @sve_1_vector_16_arr_align_8192(ptr %out) #0 {
; CHECK-NEXT: .LBB12_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB12_4: // %entry
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
@@ -605,6 +617,8 @@ define void @sve_1028_64k_guard(ptr %out) #0 "stack-probe-size"="65536" {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 2048 * VG
; CHECK-NEXT: addvl x9, x9, #-1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x79, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $x9 + 16 + 2056 * VG
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB14_4
; CHECK-NEXT: .LBB14_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
@@ -617,6 +631,7 @@ define void @sve_1028_64k_guard(ptr %out) #0 "stack-probe-size"="65536" {
; CHECK-NEXT: .LBB14_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB14_4: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: addvl sp, sp, #31
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1808 * VG
diff --git a/llvm/test/CodeGen/AArch64/stack-probing.ll b/llvm/test/CodeGen/AArch64/stack-probing.ll
index 5c5d9321a56e5..4e54d938fd10a 100644
--- a/llvm/test/CodeGen/AArch64/stack-probing.ll
+++ b/llvm/test/CodeGen/AArch64/stack-probing.ll
@@ -389,6 +389,8 @@ define void @static_16_align_8192(ptr %out) #0 {
; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096
; CHECK-NEXT: sub x9, x9, #4080
; CHECK-NEXT: and x9, x9, #0xffffffffffffe000
+; CHECK-NEXT: cmp sp, x9
+; CHECK-NEXT: b.eq .LBB13_4
; CHECK-NEXT: .LBB13_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -401,6 +403,7 @@ define void @static_16_align_8192(ptr %out) #0 {
; CHECK-NEXT: .LBB13_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: .LBB13_4: // %entry
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
More information about the llvm-commits
mailing list