[llvm] 42f628c - Reapply "[SystemZFrameLowering] Don't overrwrite R1D (backchain) when probing."
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 11 16:29:47 PST 2020
Author: Jonas Paulsson
Date: 2020-12-11T18:25:47-06:00
New Revision: 42f628c8426958daececdd997869024aedc0a068
URL: https://github.com/llvm/llvm-project/commit/42f628c8426958daececdd997869024aedc0a068
DIFF: https://github.com/llvm/llvm-project/commit/42f628c8426958daececdd997869024aedc0a068.diff
LOG: Reapply "[SystemZFrameLowering] Don't overrwrite R1D (backchain) when probing."
Fixed to properly compute the live-in lists of new blocks.
Review: Ulrich Weigand
Differential Revision: https://reviews.llvm.org/D92803
Added:
Modified:
llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll
llvm/test/CodeGen/SystemZ/stack-clash-protection.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 57529c8685de..d9e030de5af8 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -488,15 +488,6 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
MFFrame.setStackSize(StackSize);
if (StackSize) {
- // Determine if we want to store a backchain.
- bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
-
- // If we need backchain, save current stack pointer. R1 is free at this
- // point.
- if (StoreBackchain)
- BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR))
- .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
-
// Allocate StackSize bytes.
int64_t Delta = -int64_t(StackSize);
const unsigned ProbeSize = TLI.getStackProbeSize(MF);
@@ -512,18 +503,23 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
.addImm(StackSize);
}
else {
+ bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
+ // If we need backchain, save current stack pointer. R1 is free at
+ // this point.
+ if (StoreBackchain)
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII);
+ if (StoreBackchain) {
+ // The back chain is stored topmost with packed-stack.
+ int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0;
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
+ .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D)
+ .addImm(Offset).addReg(0);
+ }
}
SPOffsetFromCFA += Delta;
-
- if (StoreBackchain) {
- // The back chain is stored topmost with packed-stack.
- int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0;
- BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
- .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D)
- .addImm(Offset).addReg(0);
- }
}
if (HasFP) {
@@ -668,6 +664,13 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
.addMemOperand(MMO);
};
+ bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
+ if (StoreBackchain)
+ BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
+
+ MachineBasicBlock *DoneMBB = nullptr;
+ MachineBasicBlock *LoopMBB = nullptr;
if (NumFullBlocks < 3) {
// Emit unrolled probe statements.
for (unsigned int i = 0; i < NumFullBlocks; i++)
@@ -677,15 +680,16 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
uint64_t LoopAlloc = ProbeSize * NumFullBlocks;
SPOffsetFromCFA -= LoopAlloc;
- BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D)
+ // Use R0D to hold the exit value.
+ BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R0D)
.addReg(SystemZ::R15D);
- buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII);
- emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII);
+ buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R0D, ZII);
+ emitIncrement(*MBB, MBBI, DL, SystemZ::R0D, -int64_t(LoopAlloc), ZII);
buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc),
ZII);
- MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
- MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB);
+ DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
+ LoopMBB = SystemZ::emitBlockAfter(MBB);
MBB->addSuccessor(LoopMBB);
LoopMBB->addSuccessor(LoopMBB);
LoopMBB->addSuccessor(DoneMBB);
@@ -693,22 +697,32 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
MBB = LoopMBB;
allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/);
BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR))
- .addReg(SystemZ::R15D).addReg(SystemZ::R1D);
+ .addReg(SystemZ::R15D).addReg(SystemZ::R0D);
BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB);
MBB = DoneMBB;
MBBI = DoneMBB->begin();
buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII);
-
- recomputeLiveIns(*DoneMBB);
- recomputeLiveIns(*LoopMBB);
}
if (Residual)
allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/);
+ if (StoreBackchain) {
+ // The back chain is stored topmost with packed-stack.
+ int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0;
+ BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::STG))
+ .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D)
+ .addImm(Offset).addReg(0);
+ }
+
StackAllocMI->eraseFromParent();
+ if (DoneMBB != nullptr) {
+ // Compute the live-in lists for the new blocks.
+ recomputeLiveIns(*DoneMBB);
+ recomputeLiveIns(*LoopMBB);
+ }
}
bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
diff --git a/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll
index 748f441a9219..a72cba1b8ba4 100644
--- a/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll
+++ b/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -verify-machineinstrs | FileCheck %s
define i32 @fun0(i32 %n) #0 {
; CHECK-LABEL: fun0:
@@ -92,14 +92,14 @@ define i32 @fun2(i32 %n) #0 "stack-probe-size"="4" {
; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
; CHECK-NEXT: .cfi_offset %r11, -72
; CHECK-NEXT: .cfi_offset %r15, -40
-; CHECK-NEXT: lgr %r1, %r15
-; CHECK-NEXT: .cfi_def_cfa_register %r1
-; CHECK-NEXT: aghi %r1, -160
+; CHECK-NEXT: lgr %r0, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r0
+; CHECK-NEXT: aghi %r0, -160
; CHECK-NEXT: .cfi_def_cfa_offset 320
; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -8
; CHECK-NEXT: cg %r0, 0(%r15)
-; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1
+; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: lgr %r11, %r15
diff --git a/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll b/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll
index 8166af3673cd..40ffdc735589 100644
--- a/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll
+++ b/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 -verify-machineinstrs | FileCheck %s
;
; Test stack clash protection probing for static allocas.
@@ -48,14 +48,14 @@ define i32 @fun1() #0 {
define i32 @fun2() #0 {
; CHECK-LABEL: fun2:
; CHECK: # %bb.0:
-; CHECK-NEXT: lgr %r1, %r15
-; CHECK-NEXT: .cfi_def_cfa_register %r1
-; CHECK-NEXT: agfi %r1, -69632
+; CHECK-NEXT: lgr %r0, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r0
+; CHECK-NEXT: agfi %r0, -69632
; CHECK-NEXT: .cfi_def_cfa_offset 69792
; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: cg %r0, 4088(%r15)
-; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1
+; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: aghi %r15, -2544
@@ -81,15 +81,15 @@ define i32 @fun2() #0 {
define void @fun3() #0 {
; CHECK-LABEL: fun3:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lgr %r1, %r15
-; CHECK-NEXT: .cfi_def_cfa_register %r1
-; CHECK-NEXT: aghi %r1, -28672
+; CHECK-NEXT: lgr %r0, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r0
+; CHECK-NEXT: aghi %r0, -28672
; CHECK-NEXT: .cfi_def_cfa_offset 28832
; CHECK-NEXT: .LBB3_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: cg %r0, 4088(%r15)
-; CHECK-NEXT: clgrjh %r15, %r1, .LBB3_1
+; CHECK-NEXT: clgrjh %r15, %r0, .LBB3_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: mvhi 180(%r15), 0
@@ -110,15 +110,15 @@ entry:
define void @fun4() #0 "stack-probe-size"="8192" {
; CHECK-LABEL: fun4:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lgr %r1, %r15
-; CHECK-NEXT: .cfi_def_cfa_register %r1
-; CHECK-NEXT: aghi %r1, -24576
+; CHECK-NEXT: lgr %r0, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r0
+; CHECK-NEXT: aghi %r0, -24576
; CHECK-NEXT: .cfi_def_cfa_offset 24736
; CHECK-NEXT: .LBB4_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -8192
; CHECK-NEXT: cg %r0, 8184(%r15)
-; CHECK-NEXT: clgrjh %r15, %r1, .LBB4_1
+; CHECK-NEXT: clgrjh %r15, %r0, .LBB4_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: aghi %r15, -7608
@@ -166,15 +166,15 @@ entry:
define void @fun6() #0 "stack-probe-size"="5" {
; CHECK-LABEL: fun6:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lgr %r1, %r15
-; CHECK-NEXT: .cfi_def_cfa_register %r1
-; CHECK-NEXT: aghi %r1, -4184
+; CHECK-NEXT: lgr %r0, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r0
+; CHECK-NEXT: aghi %r0, -4184
; CHECK-NEXT: .cfi_def_cfa_offset 4344
; CHECK-NEXT: .LBB6_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -8
; CHECK-NEXT: cg %r0, 0(%r15)
-; CHECK-NEXT: clgrjh %r15, %r1, .LBB6_1
+; CHECK-NEXT: clgrjh %r15, %r0, .LBB6_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: mvhi 180(%r15), 0
@@ -237,6 +237,37 @@ define i32 @fun8() #0 {
ret i32 %c
}
+define void @fun9() #0 "backchain" {
+; CHECK-LABEL: fun9:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lgr %r1, %r15
+; CHECK-NEXT: lgr %r0, %r15
+; CHECK-NEXT: .cfi_def_cfa_register %r0
+; CHECK-NEXT: aghi %r0, -28672
+; CHECK-NEXT: .cfi_def_cfa_offset 28832
+; CHECK-NEXT: .LBB9_1: # %entry
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: aghi %r15, -4096
+; CHECK-NEXT: cg %r0, 4088(%r15)
+; CHECK-NEXT: clgrjh %r15, %r0, .LBB9_1
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: .cfi_def_cfa_register %r15
+; CHECK-NEXT: stg %r1, 0(%r15)
+; CHECK-NEXT: mvhi 180(%r15), 0
+; CHECK-NEXT: l %r0, 180(%r15)
+; CHECK-NEXT: aghi %r15, 28672
+; CHECK-NEXT: br %r14
+entry:
+ %stack = alloca [7122 x i32], align 4
+ %i = alloca i32, align 4
+ %0 = bitcast [7122 x i32]* %stack to i8*
+ %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
+ store volatile i32 0, i32* %i, align 4
+ %i.0.i.0.6 = load volatile i32, i32* %i, align 4
+ ret void
+}
+
+
declare i32 @foo()
attributes #0 = { "probe-stack"="inline-asm" }
More information about the llvm-commits
mailing list