[llvm] e7249e4 - [SimplifyCFG] Ignore free instructions when computing cost for folding branch to common dest

Arthur Eubanks via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 22 09:52:47 PDT 2021


Author: Arthur Eubanks
Date: 2021-09-22T09:52:37-07:00
New Revision: e7249e4acf3cf9438d6d9e02edecebd5b622a4dc

URL: https://github.com/llvm/llvm-project/commit/e7249e4acf3cf9438d6d9e02edecebd5b622a4dc
DIFF: https://github.com/llvm/llvm-project/commit/e7249e4acf3cf9438d6d9e02edecebd5b622a4dc.diff

LOG: [SimplifyCFG] Ignore free instructions when computing cost for folding branch to common dest

When determining whether to fold branches to a common destination by
merging two blocks, SimplifyCFG will count the number of instructions to
be moved into the first basic block. However, there's no reason to count
free instructions like bitcasts and other similar instructions.

This resolves missed branch foldings with -fstrict-vtable-pointers in
llvm-test-suite's lambda benchmark.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D108837

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/SimplifyCFG.cpp
    llvm/test/CodeGen/AArch64/csr-split.ll
    llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-free-cost.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 2ff98b238de08..a3bd89e72af95 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3258,13 +3258,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
     SawVectorOp |= isVectorOp(I);
 
     // Account for the cost of duplicating this instruction into each
-    // predecessor.
-    NumBonusInsts += PredCount;
-
-    // Early exits once we reach the limit.
-    if (NumBonusInsts >
-        BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
-      return false;
+    // predecessor. Ignore free instructions.
+    if (!TTI ||
+        TTI->getUserCost(&I, CostKind) != TargetTransformInfo::TCC_Free) {
+      NumBonusInsts += PredCount;
+
+      // Early exits once we reach the limit.
+      if (NumBonusInsts >
+          BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
+        return false;
+    }
 
     auto IsBCSSAUse = [BB, &I](Use &U) {
       auto *UI = cast<Instruction>(U.getUser());

diff  --git a/llvm/test/CodeGen/AArch64/csr-split.ll b/llvm/test/CodeGen/AArch64/csr-split.ll
index 1bee7f05acec1..de85b4313433f 100644
--- a/llvm/test/CodeGen/AArch64/csr-split.ll
+++ b/llvm/test/CodeGen/AArch64/csr-split.ll
@@ -82,22 +82,22 @@ define dso_local signext i32 @test2(i32* %p1) local_unnamed_addr  {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    cbz x0, .LBB1_2
-; CHECK-NEXT:  // %bb.1: // %if.end
+; CHECK-NEXT:    cbz x0, .LBB1_3
+; CHECK-NEXT:  // %bb.1: // %entry
 ; CHECK-NEXT:    adrp x8, a
 ; CHECK-NEXT:    ldrsw x8, [x8, :lo12:a]
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    b.eq .LBB1_3
-; CHECK-NEXT:  .LBB1_2: // %return
-; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB1_3: // %if.then2
+; CHECK-NEXT:    b.ne .LBB1_3
+; CHECK-NEXT:  // %bb.2: // %if.then2
 ; CHECK-NEXT:    bl callVoid
 ; CHECK-NEXT:    mov x0, x19
 ; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
 ; CHECK-NEXT:    b callNonVoid
+; CHECK-NEXT:  .LBB1_3: // %return
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
 ;
 ; CHECK-APPLE-LABEL: test2:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
@@ -108,26 +108,26 @@ define dso_local signext i32 @test2(i32* %p1) local_unnamed_addr  {
 ; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
 ; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
 ; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
-; CHECK-APPLE-NEXT:    cbz x0, LBB1_2
-; CHECK-APPLE-NEXT:  ; %bb.1: ; %if.end
+; CHECK-APPLE-NEXT:    cbz x0, LBB1_3
+; CHECK-APPLE-NEXT:  ; %bb.1: ; %entry
 ; CHECK-APPLE-NEXT:  Lloh2:
 ; CHECK-APPLE-NEXT:    adrp x8, _a at PAGE
 ; CHECK-APPLE-NEXT:  Lloh3:
 ; CHECK-APPLE-NEXT:    ldrsw x8, [x8, _a at PAGEOFF]
 ; CHECK-APPLE-NEXT:    mov x19, x0
 ; CHECK-APPLE-NEXT:    cmp x8, x0
-; CHECK-APPLE-NEXT:    b.eq LBB1_3
-; CHECK-APPLE-NEXT:  LBB1_2: ; %return
-; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; CHECK-APPLE-NEXT:    mov w0, wzr
-; CHECK-APPLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; CHECK-APPLE-NEXT:    ret
-; CHECK-APPLE-NEXT:  LBB1_3: ; %if.then2
+; CHECK-APPLE-NEXT:    b.ne LBB1_3
+; CHECK-APPLE-NEXT:  ; %bb.2: ; %if.then2
 ; CHECK-APPLE-NEXT:    bl _callVoid
 ; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-APPLE-NEXT:    mov x0, x19
 ; CHECK-APPLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
 ; CHECK-APPLE-NEXT:    b _callNonVoid
+; CHECK-APPLE-NEXT:  LBB1_3: ; %return
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    mov w0, wzr
+; CHECK-APPLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
 ; CHECK-APPLE-NEXT:    .loh AdrpLdr Lloh2, Lloh3
 entry:
   %tobool = icmp eq i32* %p1, null

diff  --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-free-cost.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-free-cost.ll
index ace2a5ed35cad..27df5ec445828 100644
--- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-free-cost.ll
+++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-free-cost.ll
@@ -8,12 +8,11 @@ declare void @g2()
 
 define void @f(i8* %a, i8* %b, i1 %c, i1 %d, i1 %e) {
 ; CHECK-LABEL: @f(
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[L1:%.*]], label [[L3:%.*]]
-; CHECK:       l1:
 ; CHECK-NEXT:    [[A1:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* [[A:%.*]])
 ; CHECK-NEXT:    [[B1:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* [[B:%.*]])
 ; CHECK-NEXT:    [[I:%.*]] = icmp eq i8* [[A1]], [[B1]]
-; CHECK-NEXT:    br i1 [[I]], label [[L2:%.*]], label [[L3]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[C:%.*]], i1 [[I]], i1 false
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[L2:%.*]], label [[L3:%.*]]
 ; CHECK:       l2:
 ; CHECK-NEXT:    call void @g1()
 ; CHECK-NEXT:    br label [[RET:%.*]]


        


More information about the llvm-commits mailing list