[llvm] 11455a7 - [CodeGen] Allow partial tail duplication in Machine Block Placement.

Wenlei He via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 11 12:20:56 PDT 2020


Author: Hongtao Yu
Date: 2020-04-11T12:20:31-07:00
New Revision: 11455a7905947f61533a689e5f8ef45d5d1f26ff

URL: https://github.com/llvm/llvm-project/commit/11455a7905947f61533a689e5f8ef45d5d1f26ff
DIFF: https://github.com/llvm/llvm-project/commit/11455a7905947f61533a689e5f8ef45d5d1f26ff.diff

LOG: [CodeGen] Allow partial tail duplication in Machine Block Placement.

Summary: A count profile may affect tail duplication's heuristic causing a block to be duplicated in only a part of its predecessors. This is not allowed in the Machine Block Placement pass where an assert will go off. I'm removing the assert and making the optimization bail out when such case happens.

Reviewers: wenlei, davidxl, Carrot

Reviewed By: Carrot

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77748

Added: 
    llvm/test/CodeGen/X86/tail-dup-partial.ll

Modified: 
    llvm/lib/CodeGen/MachineBlockPlacement.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 31491513bd96..a4f07806fbe3 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -2914,10 +2914,7 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
   // duplicated into is still small enough to be duplicated again.
   // No need to call markBlockSuccessors in this case, as the blocks being
   // duplicated from here on are already scheduled.
-  // Note that DuplicatedToLPred always implies Removed.
-  while (DuplicatedToLPred) {
-    assert(Removed && "Block must have been removed to be duplicated into its "
-           "layout predecessor.");
+  while (DuplicatedToLPred && Removed) {
     MachineBasicBlock *DupBB, *DupPred;
     // The removal callback causes Chain.end() to be updated when a block is
     // removed. On the first pass through the loop, the chain end should be the
@@ -2956,8 +2953,7 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
 ///                          chosen in the given order due to unnatural CFG
 ///                          only needed if \p BB is removed and
 ///                          \p PrevUnplacedBlockIt pointed to \p BB.
-/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will
-///                        only be true if the block was removed.
+/// \p DuplicatedToLPred - True if the block was duplicated into LPred.
 /// \return  - True if the block was duplicated into all preds and removed.
 bool MachineBlockPlacement::maybeTailDuplicateBlock(
     MachineBasicBlock *BB, MachineBasicBlock *LPred,

diff  --git a/llvm/test/CodeGen/X86/tail-dup-partial.ll b/llvm/test/CodeGen/X86/tail-dup-partial.ll
new file mode 100644
index 000000000000..7825856dbd0f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tail-dup-partial.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu  -O3 | FileCheck %s
+
+; Function Attrs: uwtable
+; When tail-duplicating during placement, we work backward from blocks with
+; multiple successors. In this case, the block dup1 gets duplicated into dup2
+; and if.then64, and then the block dup2 only gets duplicated into land.lhs.true.
+
+define void @partial_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6, i32 %a7) #0 align 2  !prof !1 {
+; CHECK-LABEL: partial_tail_dup:
+; CHECK:        # %bb.0: # %entry
+; CHECK-NEXT:   .p2align 4, 0x90
+; CHECK-NEXT:   .LBB0_1: # %for.cond
+; CHECK-NEXT:   # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:	  testb	$1, %dil
+; CHECK-NEXT:	  je	.LBB0_3
+; CHECK-NEXT:   # %bb.2: # %land.lhs.true
+; CHECK-NEXT:   # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:	  movl	$10, (%rdx)
+; CHECK-NEXT:	  movl	$2, (%rcx)
+; CHECK-NEXT:	  testl	%r9d, %r9d
+; CHECK-NEXT:	  je	.LBB0_1
+; CHECK-NEXT:	  jmp	.LBB0_8
+; CHECK-NEXT:	  .p2align	4, 0x90
+; CHECK-NEXT:   .LBB0_6: # %dup2
+; CHECK-NEXT:   # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:	  movl	$2, (%rcx)
+; CHECK-NEXT:	  testl	%r9d, %r9d
+; CHECK-NEXT:	  je	.LBB0_1
+; CHECK-NEXT:	  jmp	.LBB0_8
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_3: # %if.end56
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    testb $1, %sil
+; CHECK-NEXT:    je .LBB0_5
+; CHECK-NEXT:  # %bb.4: # %if.then64
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    movb $1, (%r8)
+; CHECK-NEXT:    testl %r9d, %r9d
+; CHECK-NEXT:    je .LBB0_1
+; CHECK-NEXT:    jmp .LBB0_8
+; CHECK-NEXT:  .LBB0_5: # %if.end70
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    movl $12, (%rdx)
+; CHECK-NEXT:    jne .LBB0_6  
+; CHECK-NEXT:  .LBB0_8: # %for.end
+; CHECK-NEXT:    retq
+entry:
+  br label %for.cond
+
+for.cond:                                      
+  br i1 %a1, label %land.lhs.true, label %if.end56
+
+land.lhs.true:                                   
+  store i32 10, i32* %a4, align 8
+  br label %dup2
+
+if.end56:                                        
+  br i1 %a2, label %if.then64, label %if.end70, !prof !2
+
+if.then64:                                       
+  store i8 1, i8* %a6, align 1
+  br label %dup1
+
+if.end70:                                        
+  store i32 12, i32* %a4, align 8
+  br i1 %a2, label %dup2, label %for.end
+
+dup2:                                            
+  store i32 2, i32* %a5, align 4
+  br label %dup1
+
+dup1:                                            
+  %val = load i32, i32* %a4, align 8
+  %switch = icmp ult i32 %a7, 1
+  br i1 %switch, label %for.cond, label %for.end, !prof !3
+
+for.end:                                         
+  ret void
+}
+
+attributes #0 = { uwtable }
+
+!1 = !{!"function_entry_count", i64 2}
+!2 = !{!"branch_weights", i32 5, i32 1}
+!3 = !{!"branch_weights", i32 5, i32 1}


        


More information about the llvm-commits mailing list