[llvm] b19ec1e - [BPI] Improve unreachable/ColdCall heurstics to handle loops.

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 27 10:36:15 PST 2019


Author: Taewook Oh
Date: 2019-11-27T10:36:06-08:00
New Revision: b19ec1eb3d0cbb3017e1bc7111efac5643cf4fdd

URL: https://github.com/llvm/llvm-project/commit/b19ec1eb3d0cbb3017e1bc7111efac5643cf4fdd
DIFF: https://github.com/llvm/llvm-project/commit/b19ec1eb3d0cbb3017e1bc7111efac5643cf4fdd.diff

LOG: [BPI] Improve unreachable/ColdCall heurstics to handle loops.

Summary:
While updatePostDominatedByUnreachable attemps to find basic blocks that are post-domianted by unreachable blocks, it currently cannot handle loops precisely, because it doesn't use the actual post dominator tree analysis but relies on heuristics of visiting basic blocks in post-order. More precisely, when the entire loop is post-dominated by the unreachable block, current algorithm fails to detect the entire loop as post-dominated by the unreachable because when the algorithm reaches to the loop latch it fails to tell all its successors (including the loop header) will "eventually" be post-domianted by the unreachable block, because the algorithm hasn't visited the loop header yet. This makes BPI for the loop latch to assume that loop backedges are taken with 100% of probability. And because of this, block frequency info sometimes marks virtually dead loops (which are post dominated by unreachable blocks) super hot, because 100% backedge-taken probability makes the loop iteration count the max value. updatePostDominatedByColdCall has the exact same problem as well.

To address this problem, this patch makes PostDominatedByUnreachable/PostDominatedByColdCall to be computed with the actual post-dominator tree.

Reviewers: skatkov, chandlerc, manmanren

Reviewed By: skatkov

Subscribers: manmanren, vsk, apilipenko, Carrot, qcolombet, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70104

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/BranchProbabilityInfo.h
    llvm/lib/Analysis/BranchProbabilityInfo.cpp
    llvm/test/Analysis/BranchProbabilityInfo/basic.ll
    llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll
    llvm/test/CodeGen/X86/block-placement.ll
    llvm/test/CodeGen/X86/pr37916.ll
    llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index c8965936fb9c..41d6c23b8d0d 100644
--- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -34,6 +34,7 @@ namespace llvm {
 class Function;
 class LoopInfo;
 class raw_ostream;
+class PostDominatorTree;
 class TargetLibraryInfo;
 class Value;
 
@@ -187,8 +188,10 @@ class BranchProbabilityInfo {
   /// Track the set of blocks that always lead to a cold call.
   SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall;
 
-  void updatePostDominatedByUnreachable(const BasicBlock *BB);
-  void updatePostDominatedByColdCall(const BasicBlock *BB);
+  void computePostDominatedByUnreachable(const Function &F,
+                                         PostDominatorTree *PDT);
+  void computePostDominatedByColdCall(const Function &F,
+                                      PostDominatorTree *PDT);
   bool calcUnreachableHeuristics(const BasicBlock *BB);
   bool calcMetadataWeights(const BasicBlock *BB);
   bool calcColdCallHeuristics(const BasicBlock *BB);

diff  --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 7bd237b9ad53..ffba65b5ed5e 100644
--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -146,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
 /// instruction. This is essentially never taken.
 static const uint32_t IH_NONTAKEN_WEIGHT = 1;
 
-/// Add \p BB to PostDominatedByUnreachable set if applicable.
-void
-BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
-  const Instruction *TI = BB->getTerminator();
-  if (TI->getNumSuccessors() == 0) {
-    if (isa<UnreachableInst>(TI) ||
-        // If this block is terminated by a call to
-        // @llvm.experimental.deoptimize then treat it like an unreachable since
-        // the @llvm.experimental.deoptimize call is expected to practically
-        // never execute.
-        BB->getTerminatingDeoptimizeCall())
-      PostDominatedByUnreachable.insert(BB);
-    return;
-  }
+static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT,
+                              SmallVectorImpl<const BasicBlock *> &WorkList,
+                              SmallPtrSetImpl<const BasicBlock *> &TargetSet) {
+  SmallVector<BasicBlock *, 8> Descendants;
+  SmallPtrSet<const BasicBlock *, 16> NewItems;
+
+  PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants);
+  for (auto *BB : Descendants)
+    if (TargetSet.insert(BB).second)
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+        if (!TargetSet.count(*PI))
+          NewItems.insert(*PI);
+  WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end());
+}
 
-  // If the terminator is an InvokeInst, check only the normal destination block
-  // as the unwind edge of InvokeInst is also very unlikely taken.
-  if (auto *II = dyn_cast<InvokeInst>(TI)) {
-    if (PostDominatedByUnreachable.count(II->getNormalDest()))
-      PostDominatedByUnreachable.insert(BB);
-    return;
+/// Compute a set of basic blocks that are post-dominated by unreachables.
+void BranchProbabilityInfo::computePostDominatedByUnreachable(
+    const Function &F, PostDominatorTree *PDT) {
+  SmallVector<const BasicBlock *, 8> WorkList;
+  for (auto &BB : F) {
+    const Instruction *TI = BB.getTerminator();
+    if (TI->getNumSuccessors() == 0) {
+      if (isa<UnreachableInst>(TI) ||
+          // If this block is terminated by a call to
+          // @llvm.experimental.deoptimize then treat it like an unreachable
+          // since the @llvm.experimental.deoptimize call is expected to
+          // practically never execute.
+          BB.getTerminatingDeoptimizeCall())
+        UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable);
+    }
   }
 
-  for (auto *I : successors(BB))
-    // If any of successor is not post dominated then BB is also not.
-    if (!PostDominatedByUnreachable.count(I))
-      return;
-
-  PostDominatedByUnreachable.insert(BB);
+  while (!WorkList.empty()) {
+    const BasicBlock *BB = WorkList.pop_back_val();
+    if (PostDominatedByUnreachable.count(BB))
+      continue;
+    // If the terminator is an InvokeInst, check only the normal destination
+    // block as the unwind edge of InvokeInst is also very unlikely taken.
+    if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      if (PostDominatedByUnreachable.count(II->getNormalDest()))
+        UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
+    }
+    // If all the successors are unreachable, BB is unreachable as well.
+    else if (!successors(BB).empty() &&
+             llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
+               return PostDominatedByUnreachable.count(Succ);
+             }))
+      UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
+  }
 }
 
-/// Add \p BB to PostDominatedByColdCall set if applicable.
-void
-BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
-  assert(!PostDominatedByColdCall.count(BB));
-  const Instruction *TI = BB->getTerminator();
-  if (TI->getNumSuccessors() == 0)
-    return;
+/// compute a set of basic blocks that are post-dominated by ColdCalls.
+void BranchProbabilityInfo::computePostDominatedByColdCall(
+    const Function &F, PostDominatorTree *PDT) {
+  SmallVector<const BasicBlock *, 8> WorkList;
+  for (auto &BB : F)
+    for (auto &I : BB)
+      if (const CallInst *CI = dyn_cast<CallInst>(&I))
+        if (CI->hasFnAttr(Attribute::Cold))
+          UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall);
 
-  // If all of successor are post dominated then BB is also done.
-  if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) {
-        return PostDominatedByColdCall.count(SuccBB);
-      })) {
-    PostDominatedByColdCall.insert(BB);
-    return;
-  }
+  while (!WorkList.empty()) {
+    const BasicBlock *BB = WorkList.pop_back_val();
 
-  // If the terminator is an InvokeInst, check only the normal destination
-  // block as the unwind edge of InvokeInst is also very unlikely taken.
-  if (auto *II = dyn_cast<InvokeInst>(TI))
-    if (PostDominatedByColdCall.count(II->getNormalDest())) {
-      PostDominatedByColdCall.insert(BB);
-      return;
+    // If the terminator is an InvokeInst, check only the normal destination
+    // block as the unwind edge of InvokeInst is also very unlikely taken.
+    if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      if (PostDominatedByColdCall.count(II->getNormalDest()))
+        UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
     }
-
-  // Otherwise, if the block itself contains a cold function, add it to the
-  // set of blocks post-dominated by a cold call.
-  for (auto &I : *BB)
-    if (const CallInst *CI = dyn_cast<CallInst>(&I))
-      if (CI->hasFnAttr(Attribute::Cold)) {
-        PostDominatedByColdCall.insert(BB);
-        return;
-      }
+    // If all of successor are post dominated then BB is also done.
+    else if (!successors(BB).empty() &&
+             llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
+               return PostDominatedByColdCall.count(Succ);
+             }))
+      UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
+  }
 }
 
 /// Calculate edge weights for successors lead to unreachable.
@@ -983,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
     LLVM_DEBUG(dbgs() << "\n");
   }
 
+  std::unique_ptr<PostDominatorTree> PDT =
+      std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
+  computePostDominatedByUnreachable(F, PDT.get());
+  computePostDominatedByColdCall(F, PDT.get());
+
   // Walk the basic blocks in post-order so that we can build up state about
   // the successors of a block iteratively.
   for (auto BB : post_order(&F.getEntryBlock())) {
     LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
                       << "\n");
-    updatePostDominatedByUnreachable(BB);
-    updatePostDominatedByColdCall(BB);
     // If there is no at least two successors, no sense to set probability.
     if (BB->getTerminator()->getNumSuccessors() < 2)
       continue;

diff  --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll
index 64e0a82456f1..8212cc476904 100644
--- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -141,6 +141,24 @@ exit:
   ret i32 %result
 }
 
+define i32 @test_cold_loop(i32 %a, i32 %b) {
+entry:
+  %cond1 = icmp eq i32 %a, 42
+  br i1 %cond1, label %header, label %exit
+
+header:
+  br label %body
+
+body:
+  %cond2 = icmp eq i32 %b, 42
+  br i1 %cond2, label %header, label %exit
+; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00%
+
+exit:
+  call void @coldfunc()
+  ret i32 %b
+}
+
 declare i32 @regular_function(i32 %i)
 
 define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) {

diff  --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll
index 0566ca16c2f3..6e01afd2cfc8 100644
--- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll
+++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll
@@ -79,6 +79,32 @@ exit:
   ret i32 %b
 }
 
+define i32 @test4(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test4'
+; Make sure we handle loops post-dominated by unreachables.
+entry:
+  %cond1 = icmp eq i32 %a, 42
+  br i1 %cond1, label %header, label %exit
+; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00%
+; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge]
+
+header:
+  br label %body
+
+body:
+  %cond2 = icmp eq i32 %a, 42
+  br i1 %cond2, label %header, label %abort
+; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00%
+
+abort:
+  call void @abort() noreturn
+  unreachable
+
+exit:
+  ret i32 %b
+}
+
 @_ZTIi = external global i8*
 
 ; CHECK-LABEL: throwSmallException

diff  --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index acc4b7e13811..258cc2031ae8 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) {
 ; CHECK: %loop.header
 ; CHECK: %loop.body1
 ; CHECK: %loop.body2
+; CHECK: %loop.body3
+; CHECK: %loop.inner1.begin
 ; CHECK: %loop.body4
 ; CHECK: %loop.inner2.begin
 ; CHECK: %loop.inner2.begin
-; CHECK: %loop.body3
-; CHECK: %loop.inner1.begin
 ; CHECK: %bail
 
 entry:

diff  --git a/llvm/test/CodeGen/X86/pr37916.ll b/llvm/test/CodeGen/X86/pr37916.ll
index 2da9413a9a0c..484104da9ff4 100644
--- a/llvm/test/CodeGen/X86/pr37916.ll
+++ b/llvm/test/CodeGen/X86/pr37916.ll
@@ -7,7 +7,6 @@
 define void @fn1() local_unnamed_addr {
 ; CHECK-LABEL: fn1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_1: # %if.end
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl a+4, %eax

diff  --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index 9238ab0bf89f..92708d33924f 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -29,8 +29,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 56
-; CHECK-NEXT:    subq $536, %rsp ## imm = 0x218
-; CHECK-NEXT:    .cfi_def_cfa_offset 592
+; CHECK-NEXT:    subq $552, %rsp ## imm = 0x228
+; CHECK-NEXT:    .cfi_def_cfa_offset 608
 ; CHECK-NEXT:    .cfi_offset %rbx, -56
 ; CHECK-NEXT:    .cfi_offset %r12, -48
 ; CHECK-NEXT:    .cfi_offset %r13, -40
@@ -54,7 +54,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je LBB0_55
 ; CHECK-NEXT:  LBB0_4: ## %cleanup
-; CHECK-NEXT:    addq $536, %rsp ## imm = 0x218
+; CHECK-NEXT:    addq $552, %rsp ## imm = 0x228
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r12
 ; CHECK-NEXT:    popq %r13
@@ -68,7 +68,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    je LBB0_55
 ; CHECK-NEXT:  ## %bb.6: ## %SyTime.exit2720
 ; CHECK-NEXT:    movq %rdx, %rbx
-; CHECK-NEXT:    movq %rdi, %rbp
+; CHECK-NEXT:    movq %rdi, %r14
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    cmpq %rax, %rcx
@@ -78,10 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    movl $32, %esi
 ; CHECK-NEXT:    callq _memset
 ; CHECK-NEXT:  LBB0_8: ## %while.body.preheader
-; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    imulq $1040, %rbx, %rax ## imm = 0x410
 ; CHECK-NEXT:    movq _syBuf@{{.*}}(%rip), %rcx
-; CHECK-NEXT:    leaq 8(%rcx,%rax), %rbx
+; CHECK-NEXT:    leaq 8(%rcx,%rax), %rax
+; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    movl $1, %r15d
 ; CHECK-NEXT:    movq _syCTRO@{{.*}}(%rip), %rax
 ; CHECK-NEXT:    movb $1, %cl
@@ -92,69 +92,70 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    jne LBB0_9
 ; CHECK-NEXT:  ## %bb.10: ## %do.end
-; CHECK-NEXT:    xorl %r14d, %r14d
-; CHECK-NEXT:    testb %r14b, %r14b
+; CHECK-NEXT:    xorl %ebp, %ebp
+; CHECK-NEXT:    testb %bpl, %bpl
 ; CHECK-NEXT:    jne LBB0_11
 ; CHECK-NEXT:  ## %bb.12: ## %while.body200.preheader
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
-; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
-; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %r13d, %r13d
+; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    leaq {{.*}}(%rip), %r13
+; CHECK-NEXT:    movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-NEXT:    xorl %r12d, %r12d
+; CHECK-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    jmp LBB0_13
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_20: ## %sw.bb256
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl %r14d, %r13d
+; CHECK-NEXT:    movl %ebp, %r12d
 ; CHECK-NEXT:  LBB0_21: ## %while.cond197.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    decl %r15d
 ; CHECK-NEXT:    testl %r15d, %r15d
-; CHECK-NEXT:    movl %r13d, %r14d
+; CHECK-NEXT:    movl %r12d, %ebp
 ; CHECK-NEXT:    jle LBB0_22
 ; CHECK-NEXT:  LBB0_13: ## %while.body200
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
 ; CHECK-NEXT:    ## Child Loop BB0_30 Depth 2
 ; CHECK-NEXT:    ## Child Loop BB0_38 Depth 2
-; CHECK-NEXT:    leal -268(%r14), %eax
+; CHECK-NEXT:    leal -268(%rbp), %eax
 ; CHECK-NEXT:    cmpl $105, %eax
 ; CHECK-NEXT:    ja LBB0_14
 ; CHECK-NEXT:  ## %bb.56: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movslq (%rdi,%rax,4), %rax
-; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    movslq (%r13,%rax,4), %rax
+; CHECK-NEXT:    addq %r13, %rax
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_44: ## %while.cond1037.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movl %r14d, %r13d
+; CHECK-NEXT:    testb %bl, %bl
+; CHECK-NEXT:    movl %ebp, %r12d
 ; CHECK-NEXT:    jne LBB0_21
 ; CHECK-NEXT:    jmp LBB0_55
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_14: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal 1(%r14), %eax
+; CHECK-NEXT:    leal 1(%rbp), %eax
 ; CHECK-NEXT:    cmpl $21, %eax
 ; CHECK-NEXT:    ja LBB0_20
 ; CHECK-NEXT:  ## %bb.15: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $-1, %r13d
-; CHECK-NEXT:    movslq (%rsi,%rax,4), %rax
-; CHECK-NEXT:    addq %rsi, %rax
+; CHECK-NEXT:    movl $-1, %r12d
+; CHECK-NEXT:    leaq {{.*}}(%rip), %rcx
+; CHECK-NEXT:    movslq (%rcx,%rax,4), %rax
+; CHECK-NEXT:    addq %rcx, %rax
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_18: ## %while.cond201.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $1, %r13d
+; CHECK-NEXT:    movl $1, %r12d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_26: ## %sw.bb474
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r12
+; CHECK-NEXT:    testb %bl, %bl
+; CHECK-NEXT:    ## implicit-def: $r14
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.27: ## %do.body479.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r12
+; CHECK-NEXT:    testb %bl, %bl
+; CHECK-NEXT:    ## implicit-def: $r14
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.28: ## %land.rhs485.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -165,8 +166,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_32: ## %do.body479.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_30 Depth=2
-; CHECK-NEXT:    leaq 1(%r12), %rax
-; CHECK-NEXT:    testb %dl, %dl
+; CHECK-NEXT:    leaq 1(%r14), %rax
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    je LBB0_33
 ; CHECK-NEXT:  ## %bb.29: ## %land.rhs485
 ; CHECK-NEXT:    ## in Loop: Header=BB0_30 Depth=2
@@ -175,15 +176,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_30: ## %cond.true.i.i2780
 ; CHECK-NEXT:    ## Parent Loop BB0_13 Depth=1
 ; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    movq %rax, %r12
-; CHECK-NEXT:    testb %dl, %dl
+; CHECK-NEXT:    movq %rax, %r14
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_32
 ; CHECK-NEXT:  ## %bb.31: ## %lor.rhs500
 ; CHECK-NEXT:    ## in Loop: Header=BB0_30 Depth=2
 ; CHECK-NEXT:    movl $256, %esi ## imm = 0x100
 ; CHECK-NEXT:    callq ___maskrune
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    testb %dl, %dl
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_32
 ; CHECK-NEXT:    jmp LBB0_34
 ; CHECK-NEXT:  LBB0_45: ## %sw.bb1134
@@ -193,23 +193,23 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    cmpq %rax, %rcx
 ; CHECK-NEXT:    jb LBB0_55
 ; CHECK-NEXT:  ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    movl $268, %r13d ## imm = 0x10C
+; CHECK-NEXT:    movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-NEXT:    movl $268, %r12d ## imm = 0x10C
 ; CHECK-NEXT:    jmp LBB0_21
-; CHECK-NEXT:  LBB0_19: ## %sw.bb243
+; CHECK-NEXT:  LBB0_40: ## %sw.bb566
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $2, %r13d
+; CHECK-NEXT:    movl $20, %r12d
 ; CHECK-NEXT:    jmp LBB0_21
-; CHECK-NEXT:  LBB0_40: ## %sw.bb566
+; CHECK-NEXT:  LBB0_19: ## %sw.bb243
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $20, %r13d
+; CHECK-NEXT:    movl $2, %r12d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_33: ## %if.end517.loopexitsplit
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    incq %r12
+; CHECK-NEXT:    incq %r14
 ; CHECK-NEXT:  LBB0_34: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal -324(%r13), %eax
+; CHECK-NEXT:    leal -324(%r12), %eax
 ; CHECK-NEXT:    cmpl $59, %eax
 ; CHECK-NEXT:    ja LBB0_35
 ; CHECK-NEXT:  ## %bb.57: ## %if.end517
@@ -219,11 +219,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_38
 ; CHECK-NEXT:  LBB0_35: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $11, %r13d
+; CHECK-NEXT:    cmpl $11, %r12d
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.36: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $24, %r13d
+; CHECK-NEXT:    cmpl $24, %r12d
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.37: ## %if.then532
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -233,15 +233,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_38: ## %for.cond534
 ; CHECK-NEXT:    ## Parent Loop BB0_13 Depth=1
 ; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    testb %dl, %dl
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_38
 ; CHECK-NEXT:  ## %bb.39: ## %for.cond542.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movb $0, (%r12)
-; CHECK-NEXT:    movl %r14d, %r13d
-; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
-; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
+; CHECK-NEXT:    testb %bl, %bl
+; CHECK-NEXT:    movb $0, (%r14)
+; CHECK-NEXT:    movl %ebp, %r12d
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_42: ## %while.cond864
@@ -256,30 +255,44 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    jmp LBB0_25
 ; CHECK-NEXT:  LBB0_11:
-; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %r13d, %r13d
+; CHECK-NEXT:    movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-NEXT:    xorl %r12d, %r12d
 ; CHECK-NEXT:  LBB0_22: ## %while.end1465
-; CHECK-NEXT:    incl %r13d
-; CHECK-NEXT:    cmpl $16, %r13d
+; CHECK-NEXT:    incl %r12d
+; CHECK-NEXT:    cmpl $16, %r12d
 ; CHECK-NEXT:    ja LBB0_50
 ; CHECK-NEXT:  ## %bb.23: ## %while.end1465
 ; CHECK-NEXT:    movl $83969, %eax ## imm = 0x14801
-; CHECK-NEXT:    btl %r13d, %eax
+; CHECK-NEXT:    btl %r12d, %eax
 ; CHECK-NEXT:    jae LBB0_50
 ; CHECK-NEXT:  ## %bb.24:
-; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
+; CHECK-NEXT:    xorl %ebx, %ebx
 ; CHECK-NEXT:  LBB0_48: ## %if.then1477
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:    callq _write
-; CHECK-NEXT:    subq %rbp, %rbx
+; CHECK-NEXT:    subq %rbx, %r14
 ; CHECK-NEXT:    movq _syHistory@{{.*}}(%rip), %rax
-; CHECK-NEXT:    leaq 8189(%rbx,%rax), %rax
+; CHECK-NEXT:    leaq 8189(%r14,%rax), %rax
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_49: ## %for.body1723
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    decq %rax
 ; CHECK-NEXT:    jmp LBB0_49
+; CHECK-NEXT:  LBB0_47: ## %if.then1477.loopexit
+; CHECK-NEXT:    movq %r14, %rbx
+; CHECK-NEXT:    jmp LBB0_48
+; CHECK-NEXT:  LBB0_16: ## %while.cond635.preheader
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    je LBB0_41
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB0_17: ## %for.body643.us
+; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    jmp LBB0_17
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB0_41: ## %while.cond661
+; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    jmp LBB0_41
 ; CHECK-NEXT:  LBB0_50: ## %for.cond1480.preheader
 ; CHECK-NEXT:    movl $512, %eax ## imm = 0x200
 ; CHECK-NEXT:    cmpq %rax, %rax
@@ -289,14 +302,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne LBB0_54
 ; CHECK-NEXT:  ## %bb.52: ## %while.body1679.preheader
-; CHECK-NEXT:    incl %ebp
-; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:    incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
 ; CHECK-NEXT:  LBB0_53: ## %while.body1679
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movq (%rbx), %rdi
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-NEXT:    movq (%rax), %rdi
 ; CHECK-NEXT:    callq _fileno
-; CHECK-NEXT:    movslq %ebp, %rax
-; CHECK-NEXT:    leal 1(%rax), %ebp
+; CHECK-NEXT:    movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload
+; CHECK-NEXT:    leal 1(%rax), %ecx
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
 ; CHECK-NEXT:    cmpq %rax, %rax
 ; CHECK-NEXT:    jl LBB0_53
 ; CHECK-NEXT:  LBB0_54: ## %while.cond1683.preheader
@@ -304,22 +318,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:  LBB0_55: ## %if.then.i
 ; CHECK-NEXT:    ud2
-; CHECK-NEXT:  LBB0_47: ## %if.then1477.loopexit
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
-; CHECK-NEXT:    movq %rbx, %rbp
-; CHECK-NEXT:    jmp LBB0_48
-; CHECK-NEXT:  LBB0_16: ## %while.cond635.preheader
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    je LBB0_41
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB0_17: ## %for.body643.us
-; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    jmp LBB0_17
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB0_41: ## %while.cond661
-; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    jmp LBB0_41
 entry:
   %sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64
   %old = alloca [512 x i8], align 16


        


More information about the llvm-commits mailing list