[llvm] 0a01fc9 - Revert "[TRE] allow TRE for non-capturing calls."

Sun Jul 12 14:41:17 PDT 2020

Author: Alexey Lapshin
Date: 2020-07-13T00:39:48+03:00
New Revision: 0a01fc96e24b7c7de2141a2ea07593500ea34732

URL: https://github.com/llvm/llvm-project/commit/0a01fc96e24b7c7de2141a2ea07593500ea34732
DIFF: https://github.com/llvm/llvm-project/commit/0a01fc96e24b7c7de2141a2ea07593500ea34732.diff

LOG: Revert "[TRE] allow TRE for non-capturing calls."

This reverts commit f7907e9d223d8484f9afd457ba614c2db2ae4743.

That commit caused error on multi-stage build.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
    llvm/test/Transforms/TailCallElim/basic.ll

Removed: 
    llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
    llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index bfd312a52ea5..5bb1d54d7d12 100644

--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -81,7 +81,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "tailcallelim"
@@ -93,10 +92,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 /// Scan the specified function for alloca instructions.
 /// If it contains any dynamic allocas, returns false.
 static bool canTRE(Function &F) {
-  // TODO: We don't do TRE if dynamic allocas are used.
-  // Dynamic allocas allocate stack space which should be
-  // deallocated before new iteration started. That is
-  // currently not implemented.
+  // Because of PR962, we don't TRE dynamic allocas.
   return llvm::all_of(instructions(F), [](Instruction &I) {
     auto *AI = dyn_cast<AllocaInst>(&I);
     return !AI || AI->isStaticAlloca();
@@ -189,9 +185,11 @@ struct AllocaDerivedValueTracker {
 };
 }
 
-static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
+static bool markTails(Function &F, bool &AllCallsAreTailCalls,
+                      OptimizationRemarkEmitter *ORE) {
   if (F.callsFunctionThatReturnsTwice())
     return false;
+  AllCallsAreTailCalls = true;
 
   // The local stack holds all alloca instructions and all byval arguments.
   AllocaDerivedValueTracker Tracker;
@@ -274,8 +272,11 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
         }
       }
 
-      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
+      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
         DeferredTails.push_back(CI);
+      } else {
+        AllCallsAreTailCalls = false;
+      }
     }
 
     for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
@@ -312,6 +313,8 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
       LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
       CI->setTailCall();
       Modified = true;
+    } else {
+      AllCallsAreTailCalls = false;
     }
   }
 
@@ -322,16 +325,7 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
 /// instruction from after the call to before the call, assuming that all
 /// instructions between the call and this instruction are movable.
 ///
-static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA,
-                             DenseMap<Value *, AllocaInst *> &AllocaForValue) {
-  if (isa<DbgInfoIntrinsic>(I))
-    return true;
-
-  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
-    if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
-        llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue))
-      return true;
-
+static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
   // FIXME: We can move load/store/call/free instructions above the call if the
   // call does not mod/ref the memory location being processed.
   if (I->mayHaveSideEffects())  // This also handles volatile loads.
@@ -398,6 +392,7 @@ class TailRecursionEliminator {
   // createTailRecurseLoopHeader the first time we find a call we can eliminate.
   BasicBlock *HeaderBB = nullptr;
   SmallVector<PHINode *, 8> ArgumentPHIs;
+  bool RemovableCallsMustBeMarkedTail = false;
 
   // PHI node to store our return value.
   PHINode *RetPN = nullptr;
@@ -419,15 +414,13 @@ class TailRecursionEliminator {
   // The instruction doing the accumulating.
   Instruction *AccumulatorRecursionInstr = nullptr;
 
-  // The cache for <value, alloca instruction> pairs.
-  DenseMap<Value *, AllocaInst *> AllocaForValue;
-
   TailRecursionEliminator(Function &F, const TargetTransformInfo *TTI,
                           AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
                           DomTreeUpdater &DTU)
       : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
 
-  CallInst *findTRECandidate(Instruction *TI);
+  CallInst *findTRECandidate(Instruction *TI,
+                             bool CannotTailCallElimCallsMarkedTail);
 
   void createTailRecurseLoopHeader(CallInst *CI);
 
@@ -435,9 +428,11 @@ class TailRecursionEliminator {
 
   bool eliminateCall(CallInst *CI);
 
-  bool foldReturnAndProcessPred(ReturnInst *Ret);
+  bool foldReturnAndProcessPred(ReturnInst *Ret,
+                                bool CannotTailCallElimCallsMarkedTail);
 
-  bool processReturningBlock(ReturnInst *Ret);
+  bool processReturningBlock(ReturnInst *Ret,
+                             bool CannotTailCallElimCallsMarkedTail);
 
   void cleanupAndFinalize();
 
@@ -448,7 +443,8 @@ class TailRecursionEliminator {
 };
 } // namespace
 
-CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) {
+CallInst *TailRecursionEliminator::findTRECandidate(
+    Instruction *TI, bool CannotTailCallElimCallsMarkedTail) {
   BasicBlock *BB = TI->getParent();
 
   if (&BB->front() == TI) // Make sure there is something before the terminator.
@@ -468,9 +464,9 @@ CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) {
     --BBI;
   }
 
-  assert((!CI->isTailCall() || !CI->isNoTailCall()) &&
-         "Incompatible call site attributes(Tail,NoTail)");
-  if (!CI->isTailCall())
+  // If this call is marked as a tail call, and if there are dynamic allocas in
+  // the function, we cannot perform this optimization.
+  if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
     return nullptr;
 
   // As a special case, detect code like this:
@@ -502,13 +498,26 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
   BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry);
   BI->setDebugLoc(CI->getDebugLoc());
 
-  // Move all fixed sized allocas from HeaderBB to NewEntry.
-  for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
-                            NEBI = NewEntry->begin();
-       OEBI != E;)
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
-      if (isa<ConstantInt>(AI->getArraySize()))
-        AI->moveBefore(&*NEBI);
+  // If this function has self recursive calls in the tail position where some
+  // are marked tail and some are not, only transform one flavor or another.
+  // We have to choose whether we move allocas in the entry block to the new
+  // entry block or not, so we can't make a good choice for both. We make this
+  // decision here based on whether the first call we found to remove is
+  // marked tail.
+  // NOTE: We could do slightly better here in the case that the function has
+  // no entry block allocas.
+  RemovableCallsMustBeMarkedTail = CI->isTailCall();
+
+  // If this tail call is marked 'tail' and if there are any allocas in the
+  // entry block, move them up to the new entry block.
+  if (RemovableCallsMustBeMarkedTail)
+    // Move all fixed sized allocas from HeaderBB to NewEntry.
+    for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
+                              NEBI = NewEntry->begin();
+         OEBI != E;)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
+        if (isa<ConstantInt>(AI->getArraySize()))
+          AI->moveBefore(&*NEBI);
 
   // Now that we have created a new block, which jumps to the entry
   // block, insert a PHI node for each argument of the function.
@@ -583,7 +592,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
   Instruction *AccRecInstr = nullptr;
   BasicBlock::iterator BBI(CI);
   for (++BBI; &*BBI != Ret; ++BBI) {
-    if (canMoveAboveCall(&*BBI, CI, AA, AllocaForValue))
+    if (canMoveAboveCall(&*BBI, CI, AA))
       continue;
 
     // If we can't move the instruction above the call, it might be because it
@@ -611,6 +620,9 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
   if (!HeaderBB)
     createTailRecurseLoopHeader(CI);
 
+  if (RemovableCallsMustBeMarkedTail && !CI->isTailCall())
+    return false;
+
   // Ok, now that we know we have a pseudo-entry block WITH all of the
   // required PHI nodes, add entries into the PHI node for the actual
   // parameters passed into the tail-recursive call.
@@ -660,7 +672,8 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
   return true;
 }
 
-bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
+bool TailRecursionEliminator::foldReturnAndProcessPred(
+    ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
   BasicBlock *BB = Ret->getParent();
 
   bool Change = false;
@@ -685,7 +698,8 @@ bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
   while (!UncondBranchPreds.empty()) {
     BranchInst *BI = UncondBranchPreds.pop_back_val();
     BasicBlock *Pred = BI->getParent();
-    if (CallInst *CI = findTRECandidate(BI)) {
+    if (CallInst *CI =
+            findTRECandidate(BI, CannotTailCallElimCallsMarkedTail)) {
       LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
                         << "INTO UNCOND BRANCH PRED: " << *Pred);
       FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU);
@@ -706,8 +720,9 @@ bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
   return Change;
 }
 
-bool TailRecursionEliminator::processReturningBlock(ReturnInst *Ret) {
-  CallInst *CI = findTRECandidate(Ret);
+bool TailRecursionEliminator::processReturningBlock(
+    ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
+  CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
   if (!CI)
     return false;
 
@@ -795,25 +810,35 @@ bool TailRecursionEliminator::eliminate(Function &F,
     return false;
 
   bool MadeChange = false;
-  MadeChange |= markTails(F, ORE);
+  bool AllCallsAreTailCalls = false;
+  MadeChange |= markTails(F, AllCallsAreTailCalls, ORE);
+  if (!AllCallsAreTailCalls)
+    return MadeChange;
 
   // If this function is a varargs function, we won't be able to PHI the args
   // right, so don't even try to convert it...
   if (F.getFunctionType()->isVarArg())
     return MadeChange;
 
-  if (!canTRE(F))
-    return MadeChange;
+  // If false, we cannot perform TRE on tail calls marked with the 'tail'
+  // attribute, because doing so would cause the stack size to increase (real
+  // TRE would deallocate variable sized allocas, TRE doesn't).
+  bool CanTRETailMarkedCall = canTRE(F);
 
   TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU);
 
   // Change any tail recursive calls to loops.
+  //
+  // FIXME: The code generator produces really bad code when an 'escaping
+  // alloca' is changed from being a static alloca to being a dynamic alloca.
+  // Until this is resolved, disable this transformation if that would ever
+  // happen.  This bug is PR962.
   for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
     BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB.
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
-      bool Change = TRE.processReturningBlock(Ret);
+      bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall);
       if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
-        Change = TRE.foldReturnAndProcessPred(Ret);
+        Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall);
       MadeChange |= Change;
     }
   }

diff  --git a/llvm/test/Transforms/TailCallElim/basic.ll b/llvm/test/Transforms/TailCallElim/basic.ll
index 669210da6314..6116014a024b 100644
--- a/llvm/test/Transforms/TailCallElim/basic.ll
+++ b/llvm/test/Transforms/TailCallElim/basic.ll
@@ -12,16 +12,15 @@ define void @test0() {
 	ret void
 }
 
-; Make sure that we do not do TRE if pointer to local stack
-; escapes through function call.
+; PR615. Make sure that we do not move the alloca so that it interferes with the tail call.
 define i32 @test1() {
 ; CHECK: i32 @test1()
 ; CHECK-NEXT: alloca
 	%A = alloca i32		; <i32*> [#uses=2]
 	store i32 5, i32* %A
 	call void @use(i32* %A)
-; CHECK: call i32 @test1
-	%X = call i32 @test1()		; <i32> [#uses=1]
+; CHECK: tail call i32 @test1
+	%X = tail call i32 @test1()		; <i32> [#uses=1]
 	ret i32 %X
 }
 

diff  --git a/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll b/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
deleted file mode 100644
index 8f69087dd879..000000000000
--- a/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
+++ /dev/null
@@ -1,125 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
-
-; This test checks that TRE would be done for only one recursive call.
-; The test_multiple_exits function has three recursive calls.
-; First recursive call could not be eliminated because there is
-; escaped pointer to local variable. Second recursive call could
-; be eliminated. Thrid recursive call could not be eliminated since
-; this is not last call. Thus, test checks that TRE would be done
-; for only second recursive call.
-
-; IR for that test was generated from the following C++ source:
-;
-; void capture_arg (int*);
-; void test_multiple_exits (int param);
-;   if (param >= 0 && param < 10) {
-;     int temp;
-;     capture_arg(&temp);
-;     // TRE could not be done because pointer to local
-;     // variable "temp" is escaped.
-;     test_multiple_exits(param + 1);
-;   } else if (param >=10 && param < 20) {
-;     // TRE should be done.
-;     test_multiple_exits(param + 1);
-;   } else if (param >= 20 && param < 22) {
-;     // TRE could not be done since recursive
-;     // call is not last call.
-;     test_multiple_exits(param + 1);
-;     func();
-;   }
-;
-;   return;
-; }
-
-; Function Attrs: noinline optnone uwtable
-declare void @_Z11capture_argPi(i32* %param) #0
-
-; Function Attrs: noinline optnone uwtable
-declare void @_Z4funcv() #0
-
-; Function Attrs: noinline nounwind uwtable
-define dso_local void @_Z19test_multiple_exitsi(i32 %param) local_unnamed_addr #2 {
-; CHECK-LABEL: @_Z19test_multiple_exitsi(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TEMP:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
-; CHECK:       tailrecurse:
-; CHECK-NEXT:    [[PARAM_TR:%.*]] = phi i32 [ [[PARAM:%.*]], [[ENTRY:%.*]] ], [ [[ADD6:%.*]], [[IF_THEN5:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[PARAM_TR]], 10
-; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TEMP]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP1]]) #1
-; CHECK-NEXT:    call void @_Z11capture_argPi(i32* nonnull [[TEMP]])
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[PARAM_TR]], 1
-; CHECK-NEXT:    call void @_Z19test_multiple_exitsi(i32 [[ADD]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP1]]) #1
-; CHECK-NEXT:    br label [[IF_END14:%.*]]
-; CHECK:       if.else:
-; CHECK-NEXT:    [[PARAM_OFF:%.*]] = add i32 [[PARAM_TR]], -10
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[PARAM_OFF]], 10
-; CHECK-NEXT:    br i1 [[TMP2]], label [[IF_THEN5]], label [[IF_ELSE7:%.*]]
-; CHECK:       if.then5:
-; CHECK-NEXT:    [[ADD6]] = add nuw nsw i32 [[PARAM_TR]], 1
-; CHECK-NEXT:    br label [[TAILRECURSE]]
-; CHECK:       if.else7:
-; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[PARAM_TR]], -2
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 20
-; CHECK-NEXT:    br i1 [[TMP4]], label [[IF_THEN11:%.*]], label [[IF_END14]]
-; CHECK:       if.then11:
-; CHECK-NEXT:    [[ADD12:%.*]] = add nsw i32 [[PARAM_TR]], 1
-; CHECK-NEXT:    tail call void @_Z19test_multiple_exitsi(i32 [[ADD12]])
-; CHECK-NEXT:    tail call void @_Z4funcv()
-; CHECK-NEXT:    ret void
-; CHECK:       if.end14:
-; CHECK-NEXT:    ret void
-;
-entry:
-  %temp = alloca i32, align 4
-  %0 = icmp ult i32 %param, 10
-  br i1 %0, label %if.then, label %if.else
-
-if.then:                                          ; preds = %entry
-  %1 = bitcast i32* %temp to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #2
-  call void @_Z11capture_argPi(i32* nonnull %temp)
-  %add = add nuw nsw i32 %param, 1
-  call void @_Z19test_multiple_exitsi(i32 %add)
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #2
-  br label %if.end14
-
-if.else:                                          ; preds = %entry
-  %param.off = add i32 %param, -10
-  %2 = icmp ult i32 %param.off, 10
-  br i1 %2, label %if.then5, label %if.else7
-
-if.then5:                                         ; preds = %if.else
-  %add6 = add nuw nsw i32 %param, 1
-  call void @_Z19test_multiple_exitsi(i32 %add6)
-  br label %if.end14
-
-if.else7:                                         ; preds = %if.else
-  %3 = and i32 %param, -2
-  %4 = icmp eq i32 %3, 20
-  br i1 %4, label %if.then11, label %if.end14
-
-if.then11:                                        ; preds = %if.else7
-  %add12 = add nsw i32 %param, 1
-  call void @_Z19test_multiple_exitsi(i32 %add12)
-  call void @_Z4funcv()
-  br label %if.end14
-
-if.end14:                                         ; preds = %if.then5, %if.then11, %if.else7, %if.then
-  ret void
-}
-
-; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
-
-; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
-
-attributes #0 = { nofree noinline norecurse nounwind uwtable }
-attributes #1 = { nounwind uwtable }
-attributes #2 = { argmemonly nounwind willreturn }

diff  --git a/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll b/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
deleted file mode 100644
index 2168437fc570..000000000000
--- a/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
-
-; IR for that test was generated from the following C++ source:
-;
-;int count;
-;__attribute__((noinline)) void globalIncrement(const int* param) { count += *param; }
-;
-;void test(int recurseCount)
-;{
-;    if (recurseCount == 0) return;
-;    int temp = 10;
-;    globalIncrement(&temp);
-;    test(recurseCount - 1);
-;}
-;
-
- at count = dso_local local_unnamed_addr global i32 0, align 4
-
-; Function Attrs: nofree noinline norecurse nounwind uwtable
-declare void @_Z15globalIncrementPKi(i32* nocapture readonly %param) #0
-
-; Test that TRE could be done for recursive tail routine containing
-; call to function receiving a pointer to local stack.
-
-; Function Attrs: nounwind uwtable
-define dso_local void @_Z4testi(i32 %recurseCount) local_unnamed_addr #1 {
-; CHECK-LABEL: @_Z4testi(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TEMP:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
-; CHECK:       tailrecurse:
-; CHECK-NEXT:    [[RECURSECOUNT_TR:%.*]] = phi i32 [ [[RECURSECOUNT:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[IF_END:%.*]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RECURSECOUNT_TR]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[TEMP]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP0]])
-; CHECK-NEXT:    store i32 10, i32* [[TEMP]], align 4
-; CHECK-NEXT:    call void @_Z15globalIncrementPKi(i32* nonnull [[TEMP]])
-; CHECK-NEXT:    [[SUB]] = add nsw i32 [[RECURSECOUNT_TR]], -1
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP0]])
-; CHECK-NEXT:    br label [[TAILRECURSE]]
-; CHECK:       return:
-; CHECK-NEXT:    ret void
-;
-entry:
-  %temp = alloca i32, align 4
-  %cmp = icmp eq i32 %recurseCount, 0
-  br i1 %cmp, label %return, label %if.end
-
-if.end:                                           ; preds = %entry
-  %0 = bitcast i32* %temp to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #6
-  store i32 10, i32* %temp, align 4
-  call void @_Z15globalIncrementPKi(i32* nonnull %temp)
-  %sub = add nsw i32 %recurseCount, -1
-  call void @_Z4testi(i32 %sub)
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #6
-  br label %return
-
-return:                                           ; preds = %entry, %if.end
-  ret void
-}
-
-; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
-
-; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
-
-attributes #0 = { nofree noinline norecurse nounwind uwtable }
-attributes #1 = { nounwind uwtable }
-attributes #2 = { argmemonly nounwind willreturn }