[llvm] r280832 - X86: Fold tail calls into conditional branches where possible (PR26302)

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 7 10:52:15 PDT 2016


Author: hans
Date: Wed Sep  7 12:52:14 2016
New Revision: 280832

URL: http://llvm.org/viewvc/llvm-project?rev=280832&view=rev
Log:
X86: Fold tail calls into conditional branches where possible (PR26302)

When branching to a block that immediately tail calls, it is possible to fold
the call directly into the branch if the call is direct and there is no stack
adjustment, saving one byte.

Example:

  define void @f(i32 %x, i32 %y) {
  entry:
    %p = icmp eq i32 %x, %y
    br i1 %p, label %bb1, label %bb2
  bb1:
    tail call void @foo()
    ret void
  bb2:
    tail call void @bar()
    ret void
  }

before:

  f:
          movl    4(%esp), %eax
          cmpl    8(%esp), %eax
          jne     .LBB0_2
          jmp     foo
  .LBB0_2:
          jmp     bar

after:

  f:
          movl    4(%esp), %eax
          cmpl    8(%esp), %eax
          jne     bar
  .LBB0_1:
          jmp     foo

I don't expect any significant size savings from this (on a Clang bootstrap I
saw 288 bytes), but it does make the code a little tighter.

This patch only does 32-bit, but 64-bit would work similarly.

Differential Revision: https://reviews.llvm.org/D24108

Added:
    llvm/trunk/test/CodeGen/X86/conditional-tailcall.ll
Modified:
    llvm/trunk/include/llvm/Target/TargetInstrInfo.h
    llvm/trunk/lib/CodeGen/BranchFolding.cpp
    llvm/trunk/lib/Target/X86/X86ExpandPseudo.cpp
    llvm/trunk/lib/Target/X86/X86InstrControl.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.h
    llvm/trunk/lib/Target/X86/X86MCInstLower.cpp

Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=280832&r1=280831&r2=280832&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
+++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Wed Sep  7 12:52:14 2016
@@ -1087,6 +1087,25 @@ public:
   /// terminator instruction that has not been predicated.
   virtual bool isUnpredicatedTerminator(const MachineInstr &MI) const;
 
+  /// Returns true if MI is an unconditional tail call.
+  virtual bool isUnconditionalTailCall(const MachineInstr &MI) const {
+    return false;
+  }
+
+  /// Returns true if the tail call can be made conditional on BranchCond.
+  virtual bool
+  canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
+                             const MachineInstr &TailCall) const {
+    return false;
+  }
+
+  /// Replace the conditional branch in MBB with a conditional tail call.
+  virtual void replaceBranchWithTailCall(MachineBasicBlock &MBB,
+                                         SmallVectorImpl<MachineOperand> &Cond,
+                                         const MachineInstr &TailCall) const {
+    llvm_unreachable("Target didn't implement replaceBranchWithTailCall!");
+  }
+
   /// Convert the instruction into a predicated instruction.
   /// It returns true if the operation was successful.
   virtual bool PredicateInstruction(MachineInstr &MI,

Modified: llvm/trunk/lib/CodeGen/BranchFolding.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BranchFolding.cpp?rev=280832&r1=280831&r2=280832&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/BranchFolding.cpp (original)
+++ llvm/trunk/lib/CodeGen/BranchFolding.cpp Wed Sep  7 12:52:14 2016
@@ -49,6 +49,7 @@ STATISTIC(NumDeadBlocks, "Number of dead
 STATISTIC(NumBranchOpts, "Number of branches optimized");
 STATISTIC(NumTailMerge , "Number of block tails merged");
 STATISTIC(NumHoist     , "Number of times common instructions are hoisted");
+STATISTIC(NumTailCalls,  "Number of tail calls optimized");
 
 static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
                               cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -1448,6 +1449,42 @@ ReoptimizeBlock:
     }
   }
 
+  if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
+      MF.getFunction()->optForSize()) {
+    // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
+    // direction, thereby defeating careful block placement and regressing
+    // performance. Therefore, only consider this for optsize functions.
+    MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
+    if (TII->isUnconditionalTailCall(TailCall)) {
+      MachineBasicBlock *Pred = *MBB->pred_begin();
+      MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+      SmallVector<MachineOperand, 4> PredCond;
+      bool PredAnalyzable =
+          !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
+      if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) {
+        // The predecessor has a conditional branch to this block which consists
+        // of only a tail call. Try to fold the tail call into the conditional
+        // branch.
+        if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
+          // TODO: It would be nice if analyzeBranch() could provide a pointer
+          // to the branch insturction so replaceBranchWithTailCall() doesn't
+          // have to search for it.
+          TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
+          ++NumTailCalls;
+          Pred->removeSuccessor(MBB);
+          MadeChange = true;
+          return MadeChange;
+        }
+      }
+      // If the predecessor is falling through to this block, we could reverse
+      // the branch condition and fold the tail call into that. However, after
+      // that we might have to re-arrange the CFG to fall through to the other
+      // block and there is a high risk of regressing code size rather than
+      // improving it.
+    }
+  }
+
   // Analyze the branch in the current block.
   MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
   SmallVector<MachineOperand, 4> CurCond;

Modified: llvm/trunk/lib/Target/X86/X86ExpandPseudo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ExpandPseudo.cpp?rev=280832&r1=280831&r2=280832&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ExpandPseudo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ExpandPseudo.cpp Wed Sep  7 12:52:14 2016
@@ -77,6 +77,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBa
   default:
     return false;
   case X86::TCRETURNdi:
+  case X86::TCRETURNdicc:
   case X86::TCRETURNri:
   case X86::TCRETURNmi:
   case X86::TCRETURNdi64:
@@ -94,9 +95,13 @@ bool X86ExpandPseudo::ExpandMI(MachineBa
     assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
 
     // Incoporate the retaddr area.
-    Offset = StackAdj-MaxTCDelta;
+    Offset = StackAdj - MaxTCDelta;
     assert(Offset >= 0 && "Offset should never be negative");
 
+    if (Opcode == X86::TCRETURNdicc) {
+      assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
+    }
+
     if (Offset) {
       // Check for possible merge with preceding ADD instruction.
       Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
@@ -105,19 +110,33 @@ bool X86ExpandPseudo::ExpandMI(MachineBa
 
     // Jump to label or value in register.
     bool IsWin64 = STI->isTargetWin64();
-    if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) {
-      unsigned Op = (Opcode == X86::TCRETURNdi)
-                        ? X86::TAILJMPd
-                        : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64);
+    if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
+        Opcode == X86::TCRETURNdi64) {
+      unsigned Op;
+      switch (Opcode) {
+      case X86::TCRETURNdi:
+        Op = X86::TAILJMPd;
+        break;
+      case X86::TCRETURNdicc:
+        Op = X86::TAILJMPd_CC;
+        break;
+      default:
+        Op = IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64;
+        break;
+      }
       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
-      if (JumpTarget.isGlobal())
+      if (JumpTarget.isGlobal()) {
         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
                              JumpTarget.getTargetFlags());
-      else {
+      } else {
         assert(JumpTarget.isSymbol());
         MIB.addExternalSymbol(JumpTarget.getSymbolName(),
                               JumpTarget.getTargetFlags());
       }
+      if (Op == X86::TAILJMPd_CC) {
+        MIB.addImm(MBBI->getOperand(2).getImm());
+      }
+
     } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
       unsigned Op = (Opcode == X86::TCRETURNmi)
                         ? X86::TAILJMPm

Modified: llvm/trunk/lib/Target/X86/X86InstrControl.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrControl.td?rev=280832&r1=280831&r2=280832&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrControl.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrControl.td Wed Sep  7 12:52:14 2016
@@ -245,6 +245,8 @@ let isCall = 1, isTerminator = 1, isRetu
   let Uses = [ESP] in {
   def TCRETURNdi : PseudoI<(outs),
                      (ins i32imm_pcrel:$dst, i32imm:$offset), []>;
+  def TCRETURNdicc : PseudoI<(outs),
+                     (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
   def TCRETURNri : PseudoI<(outs),
                      (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
   let mayLoad = 1 in
@@ -257,6 +259,13 @@ let isCall = 1, isTerminator = 1, isRetu
                            (ins i32imm_pcrel:$dst),
                            "jmp\t$dst",
                            [], IIC_JMP_REL>;
+
+  // This gets substituted to a conditional jump instruction in MC lowering.
+  def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
+                           (ins i32imm_pcrel:$dst, i32imm:$cond),
+                           "",
+                           [], IIC_JMP_REL>;
+
   def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
                    "", [], IIC_JMP_REG>;  // FIXME: Remove encoding when JIT is dead.
   let mayLoad = 1 in

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=280832&r1=280831&r2=280832&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Sep  7 12:52:14 2016
@@ -4029,6 +4029,75 @@ bool X86InstrInfo::isUnpredicatedTermina
   return !isPredicated(MI);
 }
 
+bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
+  case X86::TCRETURNdi:
+  case X86::TCRETURNri:
+  case X86::TCRETURNmi:
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool X86InstrInfo::canMakeTailCallConditional(
+    SmallVectorImpl<MachineOperand> &BranchCond,
+    const MachineInstr &TailCall) const {
+  if (TailCall.getOpcode() != X86::TCRETURNdi) {
+    // Only direct calls can be done with a conditional branch.
+    return false;
+  }
+
+  assert(BranchCond.size() == 1);
+  if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
+    // Can't make a conditional tail call with this condition.
+    return false;
+  }
+
+  const X86MachineFunctionInfo *X86FI =
+      TailCall.getParent()->getParent()->getInfo<X86MachineFunctionInfo>();
+  if (X86FI->getTCReturnAddrDelta() != 0 ||
+      TailCall.getOperand(1).getImm() != 0) {
+    // A conditional tail call cannot do any stack adjustment.
+    return false;
+  }
+
+  return true;
+}
+
+void X86InstrInfo::replaceBranchWithTailCall(
+    MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond,
+    const MachineInstr &TailCall) const {
+  assert(canMakeTailCallConditional(BranchCond, TailCall));
+
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (!I->isBranch())
+      assert(0 && "Can't find the branch to replace!");
+
+    X86::CondCode CC = getCondFromBranchOpc(I->getOpcode());
+    assert(BranchCond.size() == 1);
+    if (CC != BranchCond[0].getImm())
+      continue;
+
+    break;
+  }
+
+  auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(X86::TCRETURNdicc));
+  MIB->addOperand(TailCall.getOperand(0)); // Destination.
+  MIB.addImm(0); // Stack offset (not used).
+  MIB->addOperand(BranchCond[0]); // Condition.
+  MIB->addOperand(TailCall.getOperand(2)); // Regmask.
+
+  I->eraseFromParent();
+}
+
 // Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
 // not be a fallthrough MBB now due to layout changes). Return nullptr if the
 // fallthrough MBB cannot be identified.

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=280832&r1=280831&r2=280832&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Wed Sep  7 12:52:14 2016
@@ -316,6 +316,13 @@ public:
 
   // Branch analysis.
   bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
+  bool isUnconditionalTailCall(const MachineInstr &MI) const override;
+  bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
+                                  const MachineInstr &TailCall) const override;
+  void replaceBranchWithTailCall(MachineBasicBlock &MBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 const MachineInstr &TailCall) const override;
+
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,

Modified: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MCInstLower.cpp?rev=280832&r1=280831&r2=280832&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp Wed Sep  7 12:52:14 2016
@@ -499,18 +499,17 @@ ReSimplify:
     break;
   }
 
-  // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
-  case X86::TAILJMPr:
+  // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction.
+  { unsigned Opcode;
+  case X86::TAILJMPr:   Opcode = X86::JMP32r; goto SetTailJmpOpcode;
   case X86::TAILJMPd:
-  case X86::TAILJMPd64: {
-    unsigned Opcode;
-    switch (OutMI.getOpcode()) {
-    default: llvm_unreachable("Invalid opcode");
-    case X86::TAILJMPr: Opcode = X86::JMP32r; break;
-    case X86::TAILJMPd:
-    case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
-    }
+  case X86::TAILJMPd64: Opcode = X86::JMP_1;  goto SetTailJmpOpcode;
+  case X86::TAILJMPd_CC:
+    Opcode = X86::GetCondBranchFromCond(
+        static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
+    goto SetTailJmpOpcode;
 
+  SetTailJmpOpcode:
     MCOperand Saved = OutMI.getOperand(0);
     OutMI = MCInst();
     OutMI.setOpcode(Opcode);
@@ -1306,6 +1305,7 @@ void X86AsmPrinter::EmitInstruction(cons
   case X86::TAILJMPr:
   case X86::TAILJMPm:
   case X86::TAILJMPd:
+  case X86::TAILJMPd_CC:
   case X86::TAILJMPr64:
   case X86::TAILJMPm64:
   case X86::TAILJMPd64:

Added: llvm/trunk/test/CodeGen/X86/conditional-tailcall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/conditional-tailcall.ll?rev=280832&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/conditional-tailcall.ll (added)
+++ llvm/trunk/test/CodeGen/X86/conditional-tailcall.ll Wed Sep  7 12:52:14 2016
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -show-mc-encoding | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+
+define void @f(i32 %x, i32 %y) optsize {
+entry:
+	%p = icmp eq i32 %x, %y
+  br i1 %p, label %bb1, label %bb2
+bb1:
+  tail call void @foo()
+  ret void
+bb2:
+  tail call void @bar()
+  ret void
+}
+
+; CHECK-LABEL: f:
+; CHECK: cmp
+; CHECK: jne bar
+; Check that the asm doesn't just look good, but uses the correct encoding.
+; CHECK: encoding: [0x75,A]
+
+; CHECK: jmp foo




More information about the llvm-commits mailing list