[llvm] 5e46103 - [LIR][profcheck] Reuse the loop's exit condition profile (#164523)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 5 13:39:27 PST 2025
Author: Mircea Trofin
Date: 2025-11-05T13:39:22-08:00
New Revision: 5e46103f1b521ccca3e95f27f0e742fa2a2826f7
URL: https://github.com/llvm/llvm-project/commit/5e46103f1b521ccca3e95f27f0e742fa2a2826f7
DIFF: https://github.com/llvm/llvm-project/commit/5e46103f1b521ccca3e95f27f0e742fa2a2826f7.diff
LOG: [LIR][profcheck] Reuse the loop's exit condition profile (#164523)
The idioms are described in https://reviews.llvm.org/D102116 and [https://reviews.llvm.org/D92754](https://reviews.llvm.org/D91038). In both cases, the way the loop is expressed changes, without changing its iteration count, which means we can reuse the original loop's branch probabilities.
Issue #147390
Added:
llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
Modified:
llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
STATISTIC(NumShiftUntilZero,
"Number of uncountable loops recognized as 'shift until zero' idiom");
+namespace llvm {
bool DisableLIRP::All;
static cl::opt<bool, true>
DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt<bool> ForceMemsetPatternIntrinsic(
cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
cl::Hidden);
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
namespace {
class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// The loop trip count check.
auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
- Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+ if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+ std::swap(BranchWeights[0], BranchWeights[1]);
+ // We're not changing the loop profile, so we can reuse the original loop's
+ // profile.
+ setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+ }
+
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, ScalarEvolution *SE,
/// %start = <...>
/// %extraoffset = <...>
/// <...>
-/// br label %for.cond
+/// br label %loop
///
/// loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
/// %nbits = add nsw i8 %iv, %extraoffset
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
// The loop terminator.
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
- Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+ if (InvertedCond)
+ std::swap(BranchWeights[0], BranchWeights[1]);
+ // We're not changing the loop profile, so we can reuse the original loop's
+ // profile.
+ setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+ }
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0000000000000..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt -passes="module(print<block-freq>),function(loop(loop-idiom)),module(print<block-freq>)" -mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck --check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+ %nbits = add nsw i8 %iv, %extraoffset
+ %val.shifted = ashr i8 %val, %nbits
+ %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+ %iv.next = add i8 %iv, 1
+
+ call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+ br i1 %val.shifted.iszero, label %end, label %loop, !prof !{!"branch_weights", i32 1, i32 1000 }
+
+end:
+ %iv.res = phi i8 [ %iv, %loop ]
+ %nbits.res = phi i8 [ %nbits, %loop ]
+ %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+ %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+ %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+ call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+ ret i8 %iv.res
+}
+
+define i32 @p1(i32 %x, i32 %bit) {
+entry:
+ %bitmask = shl i32 1, %bit
+ br label %loop
+
+loop:
+ %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
+ %x.curr.bitmasked = and i32 %x.curr, %bitmask
+ %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
+ %x.next = shl i32 %x.curr, 1
+ br i1 %x.curr.isbitunset, label %loop, label %end, !prof !{!"branch_weights", i32 500, i32 1 }
+
+end:
+ ret i32 %x.curr
+}
+
+;
+; PROFILE: Printing analysis results of BFI for function 'p0':
+; PROFILE: block-frequency-info: p0
+; PROFILE: - entry: float = 1.0,
+; PROFILE: - loop: float = 1001.0,
+; PROFILE: - end: float = 1.0,
+; PROFILE: block-frequency-info: p1
+; PROFILE: - entry: float = 1.0,
+; PROFILE: - loop: float = 501.0,
+; PROFILE: - end: float = 1.0,
+; PROFILE: block-frequency-info: p0
+; PROFILE: - entry: float = 1.0,
+; PROFILE: - loop: float = 1001.0,
+; PROFILE: - end: float = 1.0,
+; PROFILE: block-frequency-info: p1
+; PROFILE: - entry: float = 1.0,
+; PROFILE: - loop: float = 501.0,
+; PROFILE: - end: float = 1.0,
More information about the llvm-commits
mailing list