[llvm] 7360d6d - [ARM][MachineOutliner] Do not overestimate LR liveness in return block
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 2 08:48:25 PST 2020
Author: Momchil Velikov
Date: 2020-11-02T16:47:22Z
New Revision: 7360d6d921c67e92cc82635112e72fb2a59aacf4
URL: https://github.com/llvm/llvm-project/commit/7360d6d921c67e92cc82635112e72fb2a59aacf4
DIFF: https://github.com/llvm/llvm-project/commit/7360d6d921c67e92cc82635112e72fb2a59aacf4.diff
LOG: [ARM][MachineOutliner] Do not overestimate LR liveness in return block
The `LiveRegUnits` utility (as well as `LivePhysRegs`) considers
callee-saved registers to be alive at the point after the return
instruction in a block. In the ARM backend, the `LR` register is
classified as callee-saved, which is not really correct (from an ARM
eABI or just common sense point of view). These two conditions cause
the `MachineOutliner` to overestimate the liveness of `LR`, which
results in unnecessary saves/restores of `LR` around calls to outlined
sequences. It also causes the `MachineVerifer` to crash in some
cases, because the save instruction reads a dead `LR`, for example
when the following program:
int h(int, int);
int f(int a, int b, int c, int d) {
a = h(a + 1, b - 1);
b = b + c;
return 1 + (2 * a + b) * (c - d) / (a - b) * (c + d);
}
int g(int a, int b, int c, int d) {
a = h(a - 1, b + 1);
b = b + c;
return 2 + (2 * a + b) * (c - d) / (a - b) * (c + d);
}
is compiled with `-target arm-eabi -march=armv7-m -Oz`.
This patch computes the liveness of `LR` in return blocks only, while
taking into account the few ARM instructions, which read `LR`, but
nevertheless the register is not mentioned (explicitly or implicitly)
in the instruction operands.
Differential Revision: https://reviews.llvm.org/D89189
Added:
llvm/test/CodeGen/ARM/machine-outliner-return-1.ll
llvm/test/CodeGen/ARM/machine-outliner-return-2.ll
Modified:
llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c0d5ae4dec27..6844b02b8528 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -5663,6 +5663,37 @@ ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
return 0u;
}
+// Compute liveness of LR at the point after the interval [I, E), which
+// denotes a *backward* iteration through instructions. Used only for return
+// basic blocks, which do not end with a tail call.
+static bool isLRAvailable(const TargetRegisterInfo &TRI,
+ MachineBasicBlock::reverse_iterator I,
+ MachineBasicBlock::reverse_iterator E) {
+ // At the end of the function LR dead.
+ bool Live = false;
+ for (; I != E; ++I) {
+ const MachineInstr &MI = *I;
+
+ // Check defs of LR.
+ if (MI.modifiesRegister(ARM::LR, &TRI))
+ Live = false;
+
+ // Check uses of LR.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
+ Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
+ Opcode == ARM::tBXNS_RET) {
+ // These instructions use LR, but it's not an (explicit or implicit)
+ // operand.
+ Live = true;
+ continue;
+ }
+ if (MI.readsRegister(ARM::LR, &TRI))
+ Live = true;
+ }
+ return !Live;
+}
+
outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
@@ -5755,8 +5786,15 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
for (outliner::Candidate &C : RepeatedSequenceLocs) {
C.initLRU(TRI);
- // Is LR available? If so, we don't need a save.
- if (C.LRU.available(ARM::LR)) {
+ // LR liveness is overestimated in return blocks, unless they end with a
+ // tail call.
+ const auto Last = C.getMBB()->rbegin();
+ const bool LRIsAvailable =
+ C.getMBB()->isReturnBlock() && !Last->isCall()
+ ? isLRAvailable(TRI, Last,
+ (MachineBasicBlock::reverse_iterator)C.front())
+ : C.LRU.available(ARM::LR);
+ if (LRIsAvailable) {
FrameID = MachineOutlinerNoLRSave;
NumBytesNoStackCalls += Costs.CallNoLRSave;
C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
@@ -5867,7 +5905,13 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
Flags |= MachineOutlinerMBBFlags::HasCalls;
- if (!LRU.available(ARM::LR))
+ // LR liveness is overestimated in return blocks.
+
+ bool LRIsAvailable =
+ MBB.isReturnBlock() && !MBB.back().isCall()
+ ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
+ : LRU.available(ARM::LR);
+ if (!LRIsAvailable)
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
return true;
diff --git a/llvm/test/CodeGen/ARM/machine-outliner-return-1.ll b/llvm/test/CodeGen/ARM/machine-outliner-return-1.ll
new file mode 100644
index 000000000000..30d2c151b517
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/machine-outliner-return-1.ll
@@ -0,0 +1,57 @@
+; RUN: llc --verify-machineinstrs %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7m-unknown-unknown-eabi"
+
+declare dso_local i32 @h0(i32, i32) local_unnamed_addr #1
+
+define dso_local i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+entry:
+ %add = add nsw i32 %a, 1
+ %sub = add nsw i32 %b, -1
+ %call = tail call i32 @h0(i32 %add, i32 %sub) #0
+ %add1 = add nsw i32 %c, %b
+ %mul = shl nsw i32 %call, 1
+ %add2 = add nsw i32 %mul, %add1
+ %sub3 = sub nsw i32 %c, %d
+ %mul4 = mul nsw i32 %add2, %sub3
+ %sub5 = sub nsw i32 %call, %add1
+ %div = sdiv i32 %mul4, %sub5
+ %add6 = add nsw i32 %d, %c
+ %mul7 = mul nsw i32 %div, %add6
+ %add8 = add nsw i32 %mul7, 1
+ ret i32 %add8
+}
+; CHECK-LABEL: f:
+; CHECK: bl h0
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
+; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: pop {r4, r5, r6, pc}
+
+
+define dso_local i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+entry:
+ %sub = add nsw i32 %a, -1
+ %add = add nsw i32 %b, 1
+ %call = tail call i32 @h0(i32 %sub, i32 %add) #0
+ %add1 = add nsw i32 %c, %b
+ %mul = shl nsw i32 %call, 1
+ %add2 = add nsw i32 %mul, %add1
+ %sub3 = sub nsw i32 %c, %d
+ %mul4 = mul nsw i32 %add2, %sub3
+ %sub5 = sub nsw i32 %call, %add1
+ %div = sdiv i32 %mul4, %sub5
+ %add6 = add nsw i32 %d, %c
+ %mul7 = mul nsw i32 %div, %add6
+ %add8 = add nsw i32 %mul7, 2
+ ret i32 %add8
+}
+; CHECK-LABEL: g:
+; CHECK: bl h0
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
+; CHECK-NEXT: adds r0, #2
+; CHECK-NEXT: pop {r4, r5, r6, pc}
+
+
+attributes #0 = { minsize nounwind optsize }
+attributes #1 = { minsize optsize }
diff --git a/llvm/test/CodeGen/ARM/machine-outliner-return-2.ll b/llvm/test/CodeGen/ARM/machine-outliner-return-2.ll
new file mode 100644
index 000000000000..dc695f5e928f
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/machine-outliner-return-2.ll
@@ -0,0 +1,52 @@
+; RUN: llc -verify-machineinstrs %s -o - | FileCheck %s
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7m-unknown-unknown-eabi"
+
+declare dso_local i32 @t(i32) local_unnamed_addr #0
+
+define dso_local i32 @f(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
+entry:
+ %mul = mul nsw i32 %a, 3
+ %add = add nsw i32 %mul, 1
+ %sub = add nsw i32 %b, -1
+ %div = sdiv i32 %add, %sub
+ %sub1 = sub nsw i32 %a, %c
+ %div2 = sdiv i32 %div, %sub1
+ %mul3 = mul nsw i32 %div2, %b
+ %add4 = add nsw i32 %mul3, 1
+ %call = tail call i32 @t(i32 %add4) #0
+ ret i32 %call
+}
+; CHECK-LABEL: f:
+; CHECK: str lr, [sp, #-8]!
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
+; CHECK-NEXT: ldr lr, [sp], #8
+; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: b t
+
+define dso_local i32 @g(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
+entry:
+ %mul = mul nsw i32 %a, 3
+ %add = add nsw i32 %mul, 1
+ %sub = add nsw i32 %b, -1
+ %div = sdiv i32 %add, %sub
+ %sub1 = sub nsw i32 %a, %c
+ %div2 = sdiv i32 %div, %sub1
+ %mul3 = mul nsw i32 %div2, %b
+ %add4 = add nsw i32 %mul3, 3
+ %call = tail call i32 @t(i32 %add4) #0
+ ret i32 %call
+}
+
+; CHECK-LABEL: g:
+; CHECK: str lr, [sp, #-8]!
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
+; CHECK-NEXT: ldr lr, [sp], #8
+; CHECK-NEXT: adds r0, #3
+; CHECK-NEXT: b t
+
+; CHECK-LABEL: OUTLINED_FUNCTION_0:
+; CHECK-NOT: lr
+; CHECK: bx lr
+
+attributes #0 = { minsize nounwind optsize }
More information about the llvm-commits
mailing list