[llvm] 7360d6d - [ARM][MachineOutliner] Do not overestimate LR liveness in return block

Momchil Velikov via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 2 08:48:25 PST 2020


Author: Momchil Velikov
Date: 2020-11-02T16:47:22Z
New Revision: 7360d6d921c67e92cc82635112e72fb2a59aacf4

URL: https://github.com/llvm/llvm-project/commit/7360d6d921c67e92cc82635112e72fb2a59aacf4
DIFF: https://github.com/llvm/llvm-project/commit/7360d6d921c67e92cc82635112e72fb2a59aacf4.diff

LOG: [ARM][MachineOutliner] Do not overestimate LR liveness in return block

The `LiveRegUnits` utility (as well as `LivePhysRegs`) considers
callee-saved registers to be alive at the point after the return
instruction in a block. In the ARM backend, the `LR` register is
classified as callee-saved, which is not really correct (from an ARM
eABI or just common sense point of view).  These two conditions cause
the `MachineOutliner` to overestimate the liveness of `LR`, which
results in unnecessary saves/restores of `LR` around calls to outlined
sequences.  It also causes the `MachineVerifer` to crash in some
cases, because the save instruction reads a dead `LR`, for example
when the following program:

int h(int, int);

int f(int a, int b, int c, int d) {
  a = h(a + 1, b - 1);
  b = b + c;
  return 1 + (2 * a + b) * (c - d) / (a - b) * (c + d);
}

int g(int a, int b, int c, int d) {
  a = h(a - 1, b + 1);
  b = b + c;
  return 2 + (2 * a + b) * (c - d) / (a - b) * (c + d);
}

is compiled with `-target arm-eabi -march=armv7-m -Oz`.

This patch computes the liveness of `LR` in return blocks only, while
taking into account the few ARM instructions, which read `LR`, but
nevertheless the register is not mentioned (explicitly or implicitly)
in the instruction operands.

Differential Revision: https://reviews.llvm.org/D89189

Added: 
    llvm/test/CodeGen/ARM/machine-outliner-return-1.ll
    llvm/test/CodeGen/ARM/machine-outliner-return-2.ll

Modified: 
    llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c0d5ae4dec27..6844b02b8528 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -5663,6 +5663,37 @@ ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
   return 0u;
 }
 
+// Compute liveness of LR at the point after the interval [I, E), which
+// denotes a *backward* iteration through instructions. Used only for return
+// basic blocks, which do not end with a tail call.
+static bool isLRAvailable(const TargetRegisterInfo &TRI,
+                          MachineBasicBlock::reverse_iterator I,
+                          MachineBasicBlock::reverse_iterator E) {
+  // At the end of the function LR dead.
+  bool Live = false;
+  for (; I != E; ++I) {
+    const MachineInstr &MI = *I;
+
+    // Check defs of LR.
+    if (MI.modifiesRegister(ARM::LR, &TRI))
+      Live = false;
+
+    // Check uses of LR.
+    unsigned Opcode = MI.getOpcode();
+    if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
+        Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
+        Opcode == ARM::tBXNS_RET) {
+      // These instructions use LR, but it's not an (explicit or implicit)
+      // operand.
+      Live = true;
+      continue;
+    }
+    if (MI.readsRegister(ARM::LR, &TRI))
+      Live = true;
+  }
+  return !Live;
+}
+
 outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
   outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
@@ -5755,8 +5786,15 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
 
     for (outliner::Candidate &C : RepeatedSequenceLocs) {
       C.initLRU(TRI);
-      // Is LR available? If so, we don't need a save.
-      if (C.LRU.available(ARM::LR)) {
+      // LR liveness is overestimated in return blocks, unless they end with a
+      // tail call.
+      const auto Last = C.getMBB()->rbegin();
+      const bool LRIsAvailable =
+          C.getMBB()->isReturnBlock() && !Last->isCall()
+              ? isLRAvailable(TRI, Last,
+                              (MachineBasicBlock::reverse_iterator)C.front())
+              : C.LRU.available(ARM::LR);
+      if (LRIsAvailable) {
         FrameID = MachineOutlinerNoLRSave;
         NumBytesNoStackCalls += Costs.CallNoLRSave;
         C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
@@ -5867,7 +5905,13 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
   if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
     Flags |= MachineOutlinerMBBFlags::HasCalls;
 
-  if (!LRU.available(ARM::LR))
+  // LR liveness is overestimated in return blocks.
+
+  bool LRIsAvailable =
+      MBB.isReturnBlock() && !MBB.back().isCall()
+          ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
+          : LRU.available(ARM::LR);
+  if (!LRIsAvailable)
     Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
 
   return true;

diff  --git a/llvm/test/CodeGen/ARM/machine-outliner-return-1.ll b/llvm/test/CodeGen/ARM/machine-outliner-return-1.ll
new file mode 100644
index 000000000000..30d2c151b517
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/machine-outliner-return-1.ll
@@ -0,0 +1,57 @@
+; RUN: llc --verify-machineinstrs %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7m-unknown-unknown-eabi"
+
+declare dso_local i32 @h0(i32, i32) local_unnamed_addr #1
+
+define dso_local i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+entry:
+  %add = add nsw i32 %a, 1
+  %sub = add nsw i32 %b, -1
+  %call = tail call i32 @h0(i32 %add, i32 %sub) #0
+  %add1 = add nsw i32 %c, %b
+  %mul = shl nsw i32 %call, 1
+  %add2 = add nsw i32 %mul, %add1
+  %sub3 = sub nsw i32 %c, %d
+  %mul4 = mul nsw i32 %add2, %sub3
+  %sub5 = sub nsw i32 %call, %add1
+  %div = sdiv i32 %mul4, %sub5
+  %add6 = add nsw i32 %d, %c
+  %mul7 = mul nsw i32 %div, %add6
+  %add8 = add nsw i32 %mul7, 1
+  ret i32 %add8
+}
+; CHECK-LABEL: f:
+; CHECK:       bl   h0
+; CHECK-NEXT:  bl   OUTLINED_FUNCTION_0
+; CHECK-NEXT:  adds r0, #1
+; CHECK-NEXT:  pop  {r4, r5, r6, pc}
+
+
+define dso_local i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+entry:
+  %sub = add nsw i32 %a, -1
+  %add = add nsw i32 %b, 1
+  %call = tail call i32 @h0(i32 %sub, i32 %add) #0
+  %add1 = add nsw i32 %c, %b
+  %mul = shl nsw i32 %call, 1
+  %add2 = add nsw i32 %mul, %add1
+  %sub3 = sub nsw i32 %c, %d
+  %mul4 = mul nsw i32 %add2, %sub3
+  %sub5 = sub nsw i32 %call, %add1
+  %div = sdiv i32 %mul4, %sub5
+  %add6 = add nsw i32 %d, %c
+  %mul7 = mul nsw i32 %div, %add6
+  %add8 = add nsw i32 %mul7, 2
+  ret i32 %add8
+}
+; CHECK-LABEL: g:
+; CHECK:       bl   h0
+; CHECK-NEXT:  bl   OUTLINED_FUNCTION_0
+; CHECK-NEXT:  adds r0, #2
+; CHECK-NEXT:  pop  {r4, r5, r6, pc}
+
+
+attributes #0 = { minsize nounwind optsize }
+attributes #1 = { minsize optsize }

diff  --git a/llvm/test/CodeGen/ARM/machine-outliner-return-2.ll b/llvm/test/CodeGen/ARM/machine-outliner-return-2.ll
new file mode 100644
index 000000000000..dc695f5e928f
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/machine-outliner-return-2.ll
@@ -0,0 +1,52 @@
+; RUN: llc -verify-machineinstrs %s -o - | FileCheck %s
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7m-unknown-unknown-eabi"
+
+declare dso_local i32 @t(i32) local_unnamed_addr #0
+
+define dso_local i32 @f(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
+entry:
+  %mul = mul nsw i32 %a, 3
+  %add = add nsw i32 %mul, 1
+  %sub = add nsw i32 %b, -1
+  %div = sdiv i32 %add, %sub
+  %sub1 = sub nsw i32 %a, %c
+  %div2 = sdiv i32 %div, %sub1
+  %mul3 = mul nsw i32 %div2, %b
+  %add4 = add nsw i32 %mul3, 1
+  %call = tail call i32 @t(i32 %add4) #0
+  ret i32 %call
+}
+; CHECK-LABEL: f:
+; CHECK:       str  lr, [sp, #-8]!
+; CHECK-NEXT:  bl   OUTLINED_FUNCTION_0
+; CHECK-NEXT:  ldr  lr, [sp], #8
+; CHECK-NEXT:  adds r0, #1
+; CHECK-NEXT:  b    t
+
+define dso_local i32 @g(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
+entry:
+  %mul = mul nsw i32 %a, 3
+  %add = add nsw i32 %mul, 1
+  %sub = add nsw i32 %b, -1
+  %div = sdiv i32 %add, %sub
+  %sub1 = sub nsw i32 %a, %c
+  %div2 = sdiv i32 %div, %sub1
+  %mul3 = mul nsw i32 %div2, %b
+  %add4 = add nsw i32 %mul3, 3
+  %call = tail call i32 @t(i32 %add4) #0
+  ret i32 %call
+}
+
+; CHECK-LABEL: g:
+; CHECK:       str  lr, [sp, #-8]!
+; CHECK-NEXT:  bl   OUTLINED_FUNCTION_0
+; CHECK-NEXT:  ldr  lr, [sp], #8
+; CHECK-NEXT:  adds r0, #3
+; CHECK-NEXT:  b    t
+
+; CHECK-LABEL: OUTLINED_FUNCTION_0:
+; CHECK-NOT:   lr
+; CHECK:       bx lr
+
+attributes #0 = { minsize nounwind optsize }


        


More information about the llvm-commits mailing list