[llvm] 1bf8f9e - [SimplifyCFG] use profile metadata to refine merging branch conditions

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 23 07:21:03 PDT 2021


Author: Sanjay Patel
Date: 2021-03-23T10:19:37-04:00
New Revision: 1bf8f9e228546bd54ef9739aa808b71b97ea6051

URL: https://github.com/llvm/llvm-project/commit/1bf8f9e228546bd54ef9739aa808b71b97ea6051
DIFF: https://github.com/llvm/llvm-project/commit/1bf8f9e228546bd54ef9739aa808b71b97ea6051.diff

LOG: [SimplifyCFG] use profile metadata to refine merging branch conditions

2nd try (original: 27ae17a6b014) with fix/test for crash. We must make
sure that TTI is available before trying to use it because it is not
required (might be another bug).

Original commit message:

This is one step towards solving:
https://llvm.org/PR49336

In that example, we disregard the recommended usage of builtin_expect,
so an expensive (unpredictable) branch is folded into another branch
that is guarding it.
Here, we read the profile metadata to see if the 1st (predecessor)
condition is likely to cause execution to bypass the 2nd (successor)
condition before merging conditions by using logic ops.

Differential Revision: https://reviews.llvm.org/D98898

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/SimplifyCFG.cpp
    llvm/test/Transforms/LoopSimplify/merge-exits.ll
    llvm/test/Transforms/PGOProfile/chr.ll
    llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b529637e44c7f..6190367c64983 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -63,6 +63,7 @@
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -2840,31 +2841,53 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
   }
 }
 
-// Determine if the two branches share a common destination,
-// and deduce a glue that we need to use to join branch's conditions
-// to arrive at the common destination.
+/// Determine if the two branches share a common destination and deduce a glue
+/// that joins the branches' conditions to arrive at the common destination if
+/// that would be profitable.
 static Optional<std::pair<Instruction::BinaryOps, bool>>
-CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) {
+shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
+                                          const TargetTransformInfo *TTI) {
   assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
          "Both blocks must end with a conditional branches.");
   assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
          "PredBB must be a predecessor of BB.");
 
-  if (PBI->getSuccessor(0) == BI->getSuccessor(0))
-    return {{Instruction::Or, false}};
-  else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
-    return {{Instruction::And, false}};
-  else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
-    return {{Instruction::And, true}};
-  else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
-    return {{Instruction::Or, true}};
+  // We have the potential to fold the conditions together, but if the
+  // predecessor branch is predictable, we may not want to merge them.
+  uint64_t PTWeight, PFWeight;
+  BranchProbability PBITrueProb, Likely;
+  if (TTI && PBI->extractProfMetadata(PTWeight, PFWeight) &&
+      (PTWeight + PFWeight) != 0) {
+    PBITrueProb =
+        BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
+    Likely = TTI->getPredictableBranchThreshold();
+  }
+
+  if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
+    // Speculate the 2nd condition unless the 1st is probably true.
+    if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
+      return {{Instruction::Or, false}};
+  } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
+    // Speculate the 2nd condition unless the 1st is probably false.
+    if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
+      return {{Instruction::And, false}};
+  } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
+    // Speculate the 2nd condition unless the 1st is probably true.
+    if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
+      return {{Instruction::And, true}};
+  } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
+    // Speculate the 2nd condition unless the 1st is probably false.
+    if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
+      return {{Instruction::Or, true}};
+  }
   return None;
 }
 
-static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
+static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
                                              DomTreeUpdater *DTU,
                                              MemorySSAUpdater *MSSAU,
-                                             bool PoisonSafe) {
+                                             bool PoisonSafe,
+                                             const TargetTransformInfo *TTI) {
   BasicBlock *BB = BI->getParent();
   BasicBlock *PredBlock = PBI->getParent();
 
@@ -2872,7 +2895,7 @@ static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
   Instruction::BinaryOps Opc;
   bool InvertPredCond;
   std::tie(Opc, InvertPredCond) =
-      *CheckIfCondBranchesShareCommonDestination(BI, PBI);
+      *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
 
   LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
 
@@ -3070,8 +3093,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
     // Determine if the two branches share a common destination.
     Instruction::BinaryOps Opc;
     bool InvertPredCond;
-    if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI))
-      std::tie(Opc, InvertPredCond) = *Recepie;
+    if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
+      std::tie(Opc, InvertPredCond) = *Recipe;
     else
       continue;
 
@@ -3088,7 +3111,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
         continue;
     }
 
-    return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, PoisonSafe);
+    return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, PoisonSafe,
+                                            TTI);
   }
   return Changed;
 }

diff  --git a/llvm/test/Transforms/LoopSimplify/merge-exits.ll b/llvm/test/Transforms/LoopSimplify/merge-exits.ll
index 5cdf814877842..4efc7e963048a 100644
--- a/llvm/test/Transforms/LoopSimplify/merge-exits.ll
+++ b/llvm/test/Transforms/LoopSimplify/merge-exits.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info | FileCheck %s
 
 ; Loopsimplify should be able to merge the two loop exits
@@ -7,42 +8,143 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
 
-; CHECK-LABEL: @test1
-; CHECK: bb:
-; CHECK: phi i64
-; CHECK-NOT: phi i64
-; CHECK-NOT: sext
-
 define float @test1(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[PEAKWEIGHT:%.*]], align 4
+; CHECK-NEXT:    [[T11:%.*]] = add i32 [[BANDEDGEINDEX:%.*]], -1
+; CHECK-NEXT:    [[T121:%.*]] = icmp sgt i32 [[T11]], 0
+; CHECK-NEXT:    br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]]
+; CHECK:       bb.lr.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[T11]] to i64
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[BB_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BB]] ]
+; CHECK-NEXT:    [[DISTERBHI_04:%.*]] = phi float [ 0.000000e+00, [[BB_LR_PH]] ], [ [[T4:%.*]], [[BB]] ]
+; CHECK-NEXT:    [[PEAKCOUNT_02:%.*]] = phi float [ [[T0]], [[BB_LR_PH]] ], [ [[T9:%.*]], [[BB]] ]
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr float, float* [[PTMP1:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[T2]], align 4
+; CHECK-NEXT:    [[T4]] = fadd float [[T3]], [[DISTERBHI_04]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[T7:%.*]] = getelementptr float, float* [[PEAKWEIGHT]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[T8:%.*]] = load float, float* [[T7]], align 4
+; CHECK-NEXT:    [[T9]] = fadd float [[T8]], [[PEAKCOUNT_02]]
+; CHECK-NEXT:    [[T10:%.*]] = fcmp olt float [[T4]], 2.500000e+00
+; CHECK-NEXT:    [[T12:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[T10]], [[T12]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[BB]], label [[BB1_BB3_CRIT_EDGE:%.*]]
+; CHECK:       bb1.bb3_crit_edge:
+; CHECK-NEXT:    [[T4_LCSSA:%.*]] = phi float [ [[T4]], [[BB]] ]
+; CHECK-NEXT:    [[T9_LCSSA:%.*]] = phi float [ [[T9]], [[BB]] ]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[PEAKCOUNT_0_LCSSA:%.*]] = phi float [ [[T9_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ [[T0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[DISTERBHI_0_LCSSA:%.*]] = phi float [ [[T4_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ 0.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT:    [[T13:%.*]] = fdiv float [[PEAKCOUNT_0_LCSSA]], [[DISTERBHI_0_LCSSA]]
+; CHECK-NEXT:    ret float [[T13]]
+;
+entry:
+  %t0 = load float, float* %peakWeight, align 4
+  br label %bb1
+
+bb:		; preds = %bb2
+  %t1 = sext i32 %hiPart.0 to i64
+  %t2 = getelementptr float, float* %pTmp1, i64 %t1
+  %t3 = load float, float* %t2, align 4
+  %t4 = fadd float %t3, %distERBhi.0
+  %t5 = add i32 %hiPart.0, 1
+  %t6 = sext i32 %t5 to i64
+  %t7 = getelementptr float, float* %peakWeight, i64 %t6
+  %t8 = load float, float* %t7, align 4
+  %t9 = fadd float %t8, %peakCount.0
+  br label %bb1
+
+bb1:		; preds = %bb, %entry
+  %peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
+  %hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
+  %distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
+  %t10 = fcmp uge float %distERBhi.0, 2.500000e+00
+  br i1 %t10, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+  %t11 = add i32 %bandEdgeIndex, -1
+  %t12 = icmp sgt i32 %t11, %hiPart.0
+  br i1 %t12, label %bb, label %bb3
+
+bb3:		; preds = %bb2, %bb1
+  %t13 = fdiv float %peakCount.0, %distERBhi.0
+  ret float %t13
+}
+
+; Same test as above.
+; This would crash because we assumed TTI was available to process the metadata.
+
+define float @merge_branches_profile_metadata(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
+; CHECK-LABEL: @merge_branches_profile_metadata(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[PEAKWEIGHT:%.*]], align 4
+; CHECK-NEXT:    [[T11:%.*]] = add i32 [[BANDEDGEINDEX:%.*]], -1
+; CHECK-NEXT:    [[T121:%.*]] = icmp sgt i32 [[T11]], 0
+; CHECK-NEXT:    br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]], !prof !0
+; CHECK:       bb.lr.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[T11]] to i64
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[BB_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BB]] ]
+; CHECK-NEXT:    [[DISTERBHI_04:%.*]] = phi float [ 0.000000e+00, [[BB_LR_PH]] ], [ [[T4:%.*]], [[BB]] ]
+; CHECK-NEXT:    [[PEAKCOUNT_02:%.*]] = phi float [ [[T0]], [[BB_LR_PH]] ], [ [[T9:%.*]], [[BB]] ]
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr float, float* [[PTMP1:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[T2]], align 4
+; CHECK-NEXT:    [[T4]] = fadd float [[T3]], [[DISTERBHI_04]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[T7:%.*]] = getelementptr float, float* [[PEAKWEIGHT]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[T8:%.*]] = load float, float* [[T7]], align 4
+; CHECK-NEXT:    [[T9]] = fadd float [[T8]], [[PEAKCOUNT_02]]
+; CHECK-NEXT:    [[T10:%.*]] = fcmp olt float [[T4]], 2.500000e+00
+; CHECK-NEXT:    [[T12:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[T10]], [[T12]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[BB]], label [[BB1_BB3_CRIT_EDGE:%.*]], !prof !0
+; CHECK:       bb1.bb3_crit_edge:
+; CHECK-NEXT:    [[T4_LCSSA:%.*]] = phi float [ [[T4]], [[BB]] ]
+; CHECK-NEXT:    [[T9_LCSSA:%.*]] = phi float [ [[T9]], [[BB]] ]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[PEAKCOUNT_0_LCSSA:%.*]] = phi float [ [[T9_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ [[T0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[DISTERBHI_0_LCSSA:%.*]] = phi float [ [[T4_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ 0.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT:    [[T13:%.*]] = fdiv float [[PEAKCOUNT_0_LCSSA]], [[DISTERBHI_0_LCSSA]]
+; CHECK-NEXT:    ret float [[T13]]
+;
 entry:
-	%t0 = load float, float* %peakWeight, align 4
-	br label %bb1
+  %t0 = load float, float* %peakWeight, align 4
+  br label %bb1
 
 bb:		; preds = %bb2
-	%t1 = sext i32 %hiPart.0 to i64
-	%t2 = getelementptr float, float* %pTmp1, i64 %t1
-	%t3 = load float, float* %t2, align 4
-	%t4 = fadd float %t3, %distERBhi.0
-	%t5 = add i32 %hiPart.0, 1
-	%t6 = sext i32 %t5 to i64
-	%t7 = getelementptr float, float* %peakWeight, i64 %t6
-	%t8 = load float, float* %t7, align 4
-	%t9 = fadd float %t8, %peakCount.0
-	br label %bb1
+  %t1 = sext i32 %hiPart.0 to i64
+  %t2 = getelementptr float, float* %pTmp1, i64 %t1
+  %t3 = load float, float* %t2, align 4
+  %t4 = fadd float %t3, %distERBhi.0
+  %t5 = add i32 %hiPart.0, 1
+  %t6 = sext i32 %t5 to i64
+  %t7 = getelementptr float, float* %peakWeight, i64 %t6
+  %t8 = load float, float* %t7, align 4
+  %t9 = fadd float %t8, %peakCount.0
+  br label %bb1
 
 bb1:		; preds = %bb, %entry
-	%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
-	%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
-	%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
-	%t10 = fcmp uge float %distERBhi.0, 2.500000e+00
-	br i1 %t10, label %bb3, label %bb2
+  %peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
+  %hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
+  %distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
+  %t10 = fcmp uge float %distERBhi.0, 2.500000e+00
+  br i1 %t10, label %bb3, label %bb2, !prof !0
 
 bb2:		; preds = %bb1
-	%t11 = add i32 %bandEdgeIndex, -1
-	%t12 = icmp sgt i32 %t11, %hiPart.0
-	br i1 %t12, label %bb, label %bb3
+  %t11 = add i32 %bandEdgeIndex, -1
+  %t12 = icmp sgt i32 %t11, %hiPart.0
+  br i1 %t12, label %bb, label %bb3
 
 bb3:		; preds = %bb2, %bb1
-	%t13 = fdiv float %peakCount.0, %distERBhi.0
-	ret float %t13
+  %t13 = fdiv float %peakCount.0, %distERBhi.0
+  ret float %t13
 }
+
+!0 = !{!"branch_weights", i32 2000, i32 1}

diff  --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll
index ff3a6b1b5e430..ddf4811a0363e 100644
--- a/llvm/test/Transforms/PGOProfile/chr.ll
+++ b/llvm/test/Transforms/PGOProfile/chr.ll
@@ -1277,11 +1277,12 @@ define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
 ; CHECK-LABEL: @test_chr_14(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
-; CHECK-NEXT:    [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1
+; CHECK-NEXT:    [[V1:%.*]] = icmp eq i32 [[Z:%.*]], 1
+; CHECK-NEXT:    br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
+; CHECK:       entry.split.nonchr:
 ; CHECK-NEXT:    [[V0:%.*]] = icmp eq i32 [[Z]], 0
 ; CHECK-NEXT:    [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]]
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[V1]], i1 [[V3_NONCHR]], i1 false
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19
+; CHECK-NEXT:    br i1 [[V3_NONCHR]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof !16
 ; CHECK:       bb0.nonchr:
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    br label [[BB1]]
@@ -1912,7 +1913,7 @@ define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 {
 ; CHECK-NEXT:    switch i64 [[I]], label [[BB2:%.*]] [
 ; CHECK-NEXT:    i64 2, label [[BB3_NONCHR2:%.*]]
 ; CHECK-NEXT:    i64 86, label [[BB2_NONCHR1:%.*]]
-; CHECK-NEXT:    ], !prof !20
+; CHECK-NEXT:    ], !prof !19
 ; CHECK:       bb2:
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    call void @foo()
@@ -2489,14 +2490,14 @@ define void @test_chr_24(i32* %i) !prof !14 {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !21
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !20
 ; CHECK:       bb0:
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    br label [[BB1]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
-; CHECK-NEXT:    br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !21
+; CHECK-NEXT:    br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !20
 ; CHECK:       bb2:
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    br label [[BB3]]
@@ -2550,4 +2551,3 @@ bb3:
 ; CHECK: !16 = !{!"branch_weights", i32 0, i32 1}
 ; CHECK: !17 = !{!"branch_weights", i32 1, i32 1}
 ; CHECK: !18 = !{!"branch_weights", i32 1, i32 0}
-; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000}

diff  --git a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
index 3d5fe94ade451..38d82bb944812 100644
--- a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
+++ b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -636,16 +636,17 @@ exit:
   ret i32 %outval
 }
 
-; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
+; Merging the icmps with logic-op defeats the purpose of the metadata.
 ; We can't tell which condition is expensive if they are combined.
 
 define void @or_icmps_harmful(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @or_icmps_harmful(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
+; CHECK-NEXT:    br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19
+; CHECK:       rare:
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19
+; CHECK-NEXT:    br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, i8* [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -668,16 +669,17 @@ exit:
   ret void
 }
 
-; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
+; Merging the icmps with logic-op defeats the purpose of the metadata.
 ; We can't tell which condition is expensive if they are combined.
 
 define void @or_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @or_icmps_harmful_inverted(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
+; CHECK-NEXT:    br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20
+; CHECK:       rare:
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19
+; CHECK-NEXT:    br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, i8* [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -700,7 +702,8 @@ exit:
   ret void
 }
 
-; The probability threshold is set by a builtin_expect setting.
+; The probability threshold is determined by a TTI setting.
+; In this example, we are just short of strongly expected, so speculate.
 
 define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @or_icmps_not_that_harmful(
@@ -708,7 +711,7 @@ define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
 ; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !20
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, i8* [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -731,13 +734,16 @@ exit:
   ret void
 }
 
+; The probability threshold is determined by a TTI setting.
+; In this example, we are just short of strongly expected, so speculate.
+
 define void @or_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @or_icmps_not_that_harmful_inverted(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, i8* [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -760,13 +766,15 @@ exit:
   ret void
 }
 
+; The 1st cmp is probably true, so speculating the 2nd is probably a win.
+
 define void @or_icmps_useful(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @or_icmps_useful(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, i8* [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -789,13 +797,15 @@ exit:
   ret void
 }
 
+; The 1st cmp is probably false, so speculating the 2nd is probably a win.
+
 define void @or_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @or_icmps_useful_inverted(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, i8* [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -849,16 +859,17 @@ exit:
   ret void
 }
 
-; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
+; Merging the icmps with logic-op defeats the purpose of the metadata.
 ; We can't tell which condition is expensive if they are combined.
 
 define void @and_icmps_harmful(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @and_icmps_harmful(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
+; CHECK-NEXT:    br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20
+; CHECK:       rare:
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23
+; CHECK-NEXT:    br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, i8* [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -881,16 +892,17 @@ exit:
   ret void
 }
 
-; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
+; Merging the icmps with logic-op defeats the purpose of the metadata.
 ; We can't tell which condition is expensive if they are combined.
 
 define void @and_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @and_icmps_harmful_inverted(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
+; CHECK-NEXT:    br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19
+; CHECK:       rare:
 ; CHECK-NEXT:    [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23
+; CHECK-NEXT:    br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]]
 ; CHECK:       false:
 ; CHECK-NEXT:    store i8 42, i8* [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -913,6 +925,9 @@ exit:
   ret void
 }
 
+; The probability threshold is determined by a TTI setting.
+; In this example, we are just short of strongly expected, so speculate.
+
 define void @and_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @and_icmps_not_that_harmful(
 ; CHECK-NEXT:  entry:
@@ -942,6 +957,9 @@ exit:
   ret void
 }
 
+; The probability threshold is determined by a TTI setting.
+; In this example, we are just short of strongly expected, so speculate.
+
 define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @and_icmps_not_that_harmful_inverted(
 ; CHECK-NEXT:  entry:
@@ -971,6 +989,8 @@ exit:
   ret void
 }
 
+; The 1st cmp is probably true, so speculating the 2nd is probably a win.
+
 define void @and_icmps_useful(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @and_icmps_useful(
 ; CHECK-NEXT:  entry:
@@ -1000,6 +1020,8 @@ exit:
   ret void
 }
 
+; The 1st cmp is probably false, so speculating the 2nd is probably a win.
+
 define void @and_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) {
 ; CHECK-LABEL: @and_icmps_useful_inverted(
 ; CHECK-NEXT:  entry:


        


More information about the llvm-commits mailing list