[llvm] [SimplifyIndVar] ICMP predicate conversion to EQ/NE (PR #144945)
Sergey Shcherbinin via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 19 12:04:22 PDT 2025
https://github.com/SergeyShch01 created https://github.com/llvm/llvm-project/pull/144945
Changes to convert ICMP predicate to EQ/NE when possible to facilitate better work of OSR (which makes limited changes to the IVUse formula for other predicates). Regression tests are updated accordingly.
>From 5cfae076aae70db0fcf8049de8085bdc0871a908 Mon Sep 17 00:00:00 2001
From: Sergey Shcherbinin <sscherbinin at nvidia.com>
Date: Thu, 19 Jun 2025 22:55:38 +0400
Subject: [PATCH] [SimplifyIndVar] Changes to convert ICMP predicate to EQ/NE
when possible to facilitate better work of OSR (which makes limited changes
to the IVUse formula for other predicates). Regression tests are updated
accordingly.
---
llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 163 ++++++++++++++++--
.../IndVarSimplify/AArch64/loop-guards.ll | 2 +-
.../IndVarSimplify/AArch64/widen-loop-comp.ll | 9 +-
.../Transforms/IndVarSimplify/X86/pr24356.ll | 37 +++-
.../Transforms/IndVarSimplify/ada-loops.ll | 2 +-
.../PGOProfile/Inputs/thinlto_cs.proftext | 52 ++----
.../PGOProfile/thinlto_cspgo_use.ll | 4 +-
.../AArch64/sinking-vs-if-conversion.ll | 2 +-
.../PhaseOrdering/SystemZ/sub-xor.ll | 12 +-
.../X86/hoist-load-of-baseptr.ll | 28 +--
10 files changed, 222 insertions(+), 89 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 43264cce73719..8fb310a71fa78 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -97,6 +97,7 @@ namespace {
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
+ bool forceEqualityForICmp(ICmpInst *ICmp, Instruction *IVOperand);
void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
bool IsSigned);
void replaceRemWithNumerator(BinaryOperator *Rem);
@@ -244,6 +245,128 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
return true;
}
+/// Try to change predicate of ICmp to EQ/NE to facilitate better work of OSR.
+/// This can be done only if all possible IV values but one lead to the same
+/// produced comparison result, while the 'chosen one' value gives the opposite
+/// result.
+bool SimplifyIndvar::forceEqualityForICmp(ICmpInst *ICmp,
+ Instruction *IVOperand) {
+ if (ICmp->isEquality()) {
+ // nothing to do
+ return false;
+ }
+
+ unsigned BoundOperandIdx = IVOperand == ICmp->getOperand(0) ? 1 : 0;
+ const SCEV *BoundSCEV = SE->getSCEV(ICmp->getOperand(BoundOperandIdx));
+ const SCEVConstant *BoundC = dyn_cast<SCEVConstant>(BoundSCEV);
+ CmpInst::Predicate OrigPredicate = ICmp->getPredicate();
+ CmpInst::Predicate NewPredicate = CmpInst::BAD_ICMP_PREDICATE;
+ Type *Ty = IVOperand->getType();
+ APInt NewBoundA;
+
+ if (BoundC) {
+ // Try to find the 'chosen one' value basing on predicate type and bound
+ const APInt &BoundA = BoundC->getAPInt();
+ ConstantRange ExactCR =
+ ConstantRange::makeExactICmpRegion(OrigPredicate, BoundA);
+ if (!ExactCR.getEquivalentICmp(NewPredicate, NewBoundA)) {
+ NewPredicate = CmpInst::BAD_ICMP_PREDICATE;
+ }
+ }
+
+ if (!ICmpInst::isEquality(NewPredicate)) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVOperand));
+ if (!AR) {
+ return false;
+ }
+ const SCEVConstant *IVStart = dyn_cast<SCEVConstant>(AR->getStart());
+ const SCEVConstant *IVStep =
+ dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!IVStart || !IVStep || !IVStep->getValue()->getValue()) {
+ return false;
+ }
+
+ if (BoundC) {
+ // Check to see the 'chosen one' value is the IV start value
+ bool HasNoWrap = ICmpInst::isSigned(OrigPredicate)
+ ? AR->hasNoSignedWrap()
+ : AR->hasNoUnsignedWrap();
+ if (HasNoWrap) {
+ const DataLayout &DL = ICmp->getParent()->getDataLayout();
+ Constant *SecondIterIV =
+ ConstantInt::get(Ty, IVStart->getAPInt() + IVStep->getAPInt());
+ Constant *FirstIterResult = ConstantFoldCompareInstOperands(
+ OrigPredicate, IVStart->getValue(), BoundC->getValue(), DL);
+ Constant *SecondIterResult = ConstantFoldCompareInstOperands(
+ OrigPredicate, SecondIterIV, BoundC->getValue(), DL);
+ if (FirstIterResult != SecondIterResult) {
+ NewBoundA = IVStart->getAPInt();
+ NewPredicate = FirstIterResult->isAllOnesValue() ? CmpInst::ICMP_EQ
+ : CmpInst::ICMP_NE;
+ }
+ }
+ }
+
+ if (!ICmpInst::isEquality(NewPredicate)) {
+ // Check to see the 'chosen one' value is the very last IV value.
+ // To put it differently, check to see if ICmp directly or indirectly
+ // defines maximum loop trip count (or simply has aligned behavior by
+ // accident). This is different from loop exit condition rewriting as here
+ // not only ICmp instructions directly writing to exiting branch are
+ // considered.
+
+ // check to see if max trip count and IV parameters are constant
+ const SCEVConstant *MaxBackCount =
+ dyn_cast<SCEVConstant>(SE->getConstantMaxBackedgeTakenCount(L));
+ if (!MaxBackCount) {
+ return false;
+ }
+
+ // compute the number of consecutive iterations in which produced
+ // predicate value will be the same
+ bool ExitIfTrue = false;
+ auto EL = SE->computeExitLimitFromCond(L, ICmp, ExitIfTrue, false);
+ const SCEVConstant *SameIterCount =
+ dyn_cast<SCEVConstant>(EL.ExactNotTaken);
+ if (!SameIterCount || SameIterCount->getValue()->isZero()) {
+ ExitIfTrue = !ExitIfTrue;
+ EL = SE->computeExitLimitFromCond(L, ICmp, ExitIfTrue, false);
+ SameIterCount = dyn_cast<SCEVConstant>(EL.ExactNotTaken);
+ }
+
+ if (SameIterCount != MaxBackCount) {
+ // ICmp isn't aligned with maximum trip count
+ return false;
+ }
+
+ unsigned IVBitWigth = IVStep->getAPInt().getBitWidth();
+ unsigned CountBitWigth = SameIterCount->getAPInt().getBitWidth();
+ APInt SameIterCountA = SameIterCount->getAPInt();
+ if (IVBitWigth < CountBitWigth) {
+ SameIterCountA = SameIterCountA.trunc(IVBitWigth);
+ } else if (IVBitWigth > CountBitWigth) {
+ SameIterCountA = SameIterCountA.zext(IVBitWigth);
+ }
+ NewBoundA = IVStart->getAPInt() + (IVStep->getAPInt() * SameIterCountA);
+ NewPredicate = ExitIfTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+ }
+ }
+
+ if (!TTI->isLegalICmpImmediate(NewBoundA.getSExtValue())) {
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "INDVARS: Force EQ/NE predicate for max trip count: "
+ << *ICmp << '\n');
+
+ assert(Ty->getPrimitiveSizeInBits() == NewBoundA.getBitWidth() &&
+ "bit widths should be aligned");
+ ICmp->setOperand(BoundOperandIdx, ConstantInt::get(Ty, NewBoundA));
+ ICmp->setPredicate(NewPredicate);
+
+ return true;
+}
+
/// SimplifyIVUsers helper for eliminating useless
/// comparisons against an induction variable.
void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
@@ -267,6 +390,7 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
// If the condition is always true or always false in the given context,
// replace it with a constant value.
SmallVector<Instruction *, 4> Users;
+ bool IsDead = false;
for (auto *U : ICmp->users())
Users.push_back(cast<Instruction>(U));
const Instruction *CtxI = findCommonDominator(Users, *DT);
@@ -274,26 +398,35 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
SE->forgetValue(ICmp);
ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev));
DeadInsts.emplace_back(ICmp);
+ IsDead = true;
LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
} else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
- // fallthrough to end of function
- } else if (ICmpInst::isSigned(OriginalPred) &&
- SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
+ IsDead = true;
+ } else {
// If we were unable to make anything above, all we can is to canonicalize
// the comparison hoping that it will open the doors for other
- // optimizations. If we find out that we compare two non-negative values,
- // we turn the instruction's predicate to its unsigned version. Note that
- // we cannot rely on Pred here unless we check if we have swapped it.
- assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
- LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
- << '\n');
- ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
- ICmp->setSameSign();
- } else
- return;
+ // optimizations.
+ if (ICmpInst::isSigned(OriginalPred) && SE->isKnownNonNegative(S) &&
+ SE->isKnownNonNegative(X)) {
+ // If we find out that we compare two non-negative values,
+ // we turn the instruction's predicate to its unsigned version. Note that
+ // we cannot rely on Pred here unless we check if we have swapped it.
+ assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
+ LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
+ << '\n');
+ ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
+ ICmp->setSameSign();
+ Changed = true;
+ }
+ if (forceEqualityForICmp(ICmp, IVOperand)) {
+ Changed = true;
+ }
+ }
- ++NumElimCmp;
- Changed = true;
+ if (IsDead) {
+ NumElimCmp++;
+ Changed = true;
+ }
}
bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll
index 409622c255ea0..af0128af5b0c8 100644
--- a/llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll
+++ b/llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll
@@ -13,7 +13,7 @@ define i32 @guards_applied_to_add_rec(ptr %dst) {
; CHECK-NEXT: [[OUTER_IV_0:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[OUTER_IV_0_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; CHECK-NEXT: [[OUTER_IV_1:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[OUTER_IV_0]], %[[OUTER_LATCH]] ]
; CHECK-NEXT: [[SHR28:%.*]] = lshr i32 [[OUTER_IV_1]], 1
-; CHECK-NEXT: [[PRE:%.*]] = icmp samesign ult i32 [[OUTER_IV_1]], 2
+; CHECK-NEXT: [[PRE:%.*]] = icmp samesign eq i32 [[OUTER_IV_1]], 1
; CHECK-NEXT: br i1 [[PRE]], label %[[OUTER_LATCH]], label %[[INNER_PREHEADER:.*]]
; CHECK: [[INNER_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SHR28]] to i64
diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
index 257816650017a..4e280c628a6a5 100644
--- a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
@@ -97,7 +97,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[LIMIT:%.*]] to i32
-; CHECK-NEXT: br i1 %arg, label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
; CHECK: for.cond1.preheader.us.preheader:
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[CONV]], i32 1)
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
@@ -110,7 +110,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) {
; CHECK-NEXT: br label [[FOR_INC13_US]]
; CHECK: for.inc13.us:
; CHECK-NEXT: [[INDVARS_IV_NEXT4]] = add nuw nsw i64 [[INDVARS_IV3]], 1
-; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT4]], 4
+; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp samesign ne i64 [[INDVARS_IV_NEXT4]], 4
; CHECK-NEXT: br i1 [[EXITCOND6]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]]
; CHECK: for.body4.us:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY4_LR_PH_US]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US:%.*]] ]
@@ -237,8 +237,7 @@ define i32 @test4(i32 %a) {
; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[OR]] to i8
; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn1(i8 signext [[CONV3]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i32 [[INDVARS_IV]], -1
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[INDVARS_IV_NEXT]] to i8
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[TMP0]], -14
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[INDVARS_IV_NEXT]], 242
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
@@ -517,8 +516,8 @@ define i32 @test10(i32 %v) {
; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], [[SEXT]]
; CHECK-NEXT: call void @consume.i1(i1 [[TMP1]])
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp samesign ne i64 [[INDVARS_IV_NEXT]], 11
; CHECK-NEXT: call void @consume.i64(i64 [[TMP0]])
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 11
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE:%.*]]
; CHECK: leave:
; CHECK-NEXT: ret i32 22
diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr24356.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr24356.ll
index f2d938f6452d3..56b3695ba16b8 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/pr24356.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/pr24356.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=indvars -indvars-predicate-loops=0 < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@@ -7,15 +8,43 @@ target triple = "x86_64-apple-macosx10.10.0"
; Function Attrs: nounwind ssp uwtable
define void @fn1() {
-; CHECK-LABEL: @fn1(
+; CHECK-LABEL: define void @fn1() {
+; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: br label %[[BB4_PREHEADER:.*]]
+; CHECK: [[BB4_PREHEADER]]:
+; CHECK-NEXT: [[B_03:%.*]] = phi i8 [ 0, %[[BB]] ], [ [[TMP17:%.*]], %[[BB16:.*]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i8 [[B_03]], 0
+; CHECK-NEXT: br i1 [[TMP9]], label %[[BB4_PREHEADER_BB18_LOOPEXIT_SPLIT_CRIT_EDGE:.*]], label %[[BB4_PREHEADER_BB4_PREHEADER_SPLIT_CRIT_EDGE:.*]]
+; CHECK: [[BB4_PREHEADER_BB4_PREHEADER_SPLIT_CRIT_EDGE]]:
+; CHECK-NEXT: br label %[[BB4_PREHEADER_SPLIT:.*]]
+; CHECK: [[BB4_PREHEADER_BB18_LOOPEXIT_SPLIT_CRIT_EDGE]]:
+; CHECK-NEXT: store i32 0, ptr @a, align 4
+; CHECK-NEXT: br label %[[BB18_LOOPEXIT_SPLIT:.*]]
+; CHECK: [[BB4_PREHEADER_SPLIT]]:
+; CHECK-NEXT: br label %[[BB7:.*]]
+; CHECK: [[BB4:.*]]:
+; CHECK-NEXT: br i1 false, label %[[BB7]], label %[[BB16]]
+; CHECK: [[BB7]]:
+; CHECK-NEXT: br i1 false, label %[[BB18_LOOPEXIT:.*]], label %[[BB4]]
+; CHECK: [[BB16]]:
+; CHECK-NEXT: [[TMP17]] = add nuw nsw i8 [[B_03]], -1
+; CHECK-NEXT: br i1 false, label %[[BB18_LOOPEXIT1:.*]], label %[[BB4_PREHEADER]]
+; CHECK: [[BB18_LOOPEXIT]]:
+; CHECK-NEXT: br label %[[BB18_LOOPEXIT_SPLIT]]
+; CHECK: [[BB18_LOOPEXIT_SPLIT]]:
+; CHECK-NEXT: br label %[[BB18:.*]]
+; CHECK: [[BB18_LOOPEXIT1]]:
+; CHECK-NEXT: [[TMP14_LCSSA5_LCSSA:%.*]] = phi i32 [ 1, %[[BB16]] ]
+; CHECK-NEXT: store i32 [[TMP14_LCSSA5_LCSSA]], ptr @a, align 4
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: ret void
+;
bb:
br label %bb4.preheader
bb4.preheader: ; preds = %bb, %bb16
-; CHECK-LABEL: bb4.preheader:
%b.03 = phi i8 [ 0, %bb ], [ %tmp17, %bb16 ]
-; CHECK: %tmp9 = icmp ugt i8 %b.03, 1
-; CHECK-NOT: %tmp9 = icmp ugt i8 0, 1
%tmp9 = icmp ugt i8 %b.03, 1
br i1 %tmp9, label %bb4.preheader.bb18.loopexit.split_crit_edge, label %bb4.preheader.bb4.preheader.split_crit_edge
diff --git a/llvm/test/Transforms/IndVarSimplify/ada-loops.ll b/llvm/test/Transforms/IndVarSimplify/ada-loops.ll
index 83f2f212a4385..3111b884d686c 100644
--- a/llvm/test/Transforms/IndVarSimplify/ada-loops.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ada-loops.ll
@@ -133,7 +133,7 @@ define void @kinds__urangezero(ptr nocapture %a) nounwind {
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [21 x i32], ptr [[A]], i32 0, i32 [[TMP4]]
; CHECK-NEXT: store i32 0, ptr [[TMP5]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 31
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp samesign eq i32 [[INDVARS_IV_NEXT]], 31
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[BB]]
; CHECK: return:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext
index 1b9f19e7f7fa5..f9ad9a0e2fba4 100644
--- a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext
+++ b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext
@@ -1,72 +1,44 @@
# CSIR level Instrumentation Flag
:csir
-cond.llvm.11253644763537639171
-# Func Hash:
-1152921517491748863
-# Num Counters:
-1
-# Counter Values:
-200000
-
-foo
-# Func Hash:
-1720106746050921044
-# Num Counters:
-2
-# Counter Values:
-100000
-1
-
bar
# Func Hash:
1299757151682747028
# Num Counters:
2
# Counter Values:
-0
-0
-
-bar
-# Func Hash:
-29667547796
-# Num Counters:
-2
-# Counter Values:
100000
100000
main
# Func Hash:
-1152921517491748863
+1895182923573755903
# Num Counters:
1
# Counter Values:
1
-main
+cspgo_bar.c;clobber
# Func Hash:
1895182923573755903
# Num Counters:
1
# Counter Values:
-1
+200000
-cspgo.c:foo
+cspgo_bar.c;cond
# Func Hash:
-1720106746050921044
+1895182923573755903
# Num Counters:
-4
-# Counter Values:
-100000
-100000
-0
1
+# Counter Values:
+200000
-cspgo_bar.c:cond
+cspgo.c;foo
# Func Hash:
-12884901887
+2216626667076672412
# Num Counters:
-1
+2
# Counter Values:
-200000
+100000
+1
diff --git a/llvm/test/Transforms/PGOProfile/thinlto_cspgo_use.ll b/llvm/test/Transforms/PGOProfile/thinlto_cspgo_use.ll
index 6d35946a28ff3..3dcad34205d4b 100644
--- a/llvm/test/Transforms/PGOProfile/thinlto_cspgo_use.ll
+++ b/llvm/test/Transforms/PGOProfile/thinlto_cspgo_use.ll
@@ -15,8 +15,8 @@
; CSUSE: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
; CSUSE: {{![0-9]+}} = !{i32 1, !"CSProfileSummary", {{![0-9]+}}}
-; CSUSE-DAG: {{![0-9]+}} = !{!"branch_weights", i32 100000, i32 0}
-; CSUSE-DAG: {{![0-9]+}} = !{!"branch_weights", i32 0, i32 100000}
+; CSUSE-DAG: {{![0-9]+}} = !{!"branch_weights", i32 100000, i32 100000}
+; CSUSE-DAG: {{![0-9]+}} = !{!"branch_weights", i32 1, i32 100000}
source_filename = "cspgo.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll
index eda54d999a79f..3340862ca4cfb 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll
@@ -156,7 +156,7 @@ define void @cond_select_loop(ptr noalias nocapture noundef readonly %a, ptr noa
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[I_07]]
; CHECK-NEXT: store float [[COND]], ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
-; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1000
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp samesign eq i64 [[I_07]], 999
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
index 5386bf939918a..2fc739f2a492b 100644
--- a/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
+++ b/llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll
@@ -52,8 +52,8 @@ define dso_local zeroext i32 @foo(ptr noundef %a) #0 {
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADD_PTR_7]], align 4, !tbaa [[TBAA3]]
; CHECK-NEXT: [[ADD_7]] = add i32 [[TMP7]], [[ADD_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
-; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], 32
-; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_BODY4_1:%.*]], label [[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: [[CMP2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV]], 24
+; CHECK-NEXT: br i1 [[CMP2_NOT_7]], label [[FOR_BODY4_1:%.*]], label [[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: for.body4.1:
; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1_7:%.*]], [[FOR_BODY4_1]] ], [ 0, [[FOR_BODY4]] ]
; CHECK-NEXT: [[SUM_11_1:%.*]] = phi i32 [ [[ADD_1_7:%.*]], [[FOR_BODY4_1]] ], [ [[ADD_7]], [[FOR_BODY4]] ]
@@ -91,8 +91,8 @@ define dso_local zeroext i32 @foo(ptr noundef %a) #0 {
; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 1
; CHECK-NEXT: [[ADD_1_7]] = add i32 [[TMP23]], [[SUM_11_1]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8
-; CHECK-NEXT: [[EXITCOND_1_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1_7]], 32
-; CHECK-NEXT: br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]]
+; CHECK-NEXT: [[CMP2_1_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_1]], 24
+; CHECK-NEXT: br i1 [[CMP2_1_NOT_7]], label [[FOR_BODY4_2:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]]
; CHECK: for.body4.2:
; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2_7:%.*]], [[FOR_BODY4_2]] ], [ 0, [[FOR_BODY4_1]] ]
; CHECK-NEXT: [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ], [ [[ADD_1_7]], [[FOR_BODY4_1]] ]
@@ -137,8 +137,8 @@ define dso_local zeroext i32 @foo(ptr noundef %a) #0 {
; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP31]], 3
; CHECK-NEXT: [[ADD_2_7]] = add i32 [[MUL_2_7]], [[ADD_2_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2_7]] = add nuw nsw i64 [[INDVARS_IV_2]], 8
-; CHECK-NEXT: [[EXITCOND_2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2_7]], 32
-; CHECK-NEXT: br i1 [[EXITCOND_2_NOT_7]], label [[FOR_INC5_2:%.*]], label [[FOR_BODY4_2]], !llvm.loop [[LOOP7]]
+; CHECK-NEXT: [[CMP2_2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_2]], 24
+; CHECK-NEXT: br i1 [[CMP2_2_NOT_7]], label [[FOR_INC5_2:%.*]], label [[FOR_BODY4_2]], !llvm.loop [[LOOP7]]
; CHECK: for.inc5.2:
; CHECK-NEXT: ret i32 [[ADD_2_7]]
;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
index dfad53411aa55..aa812e5670ec0 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
@@ -27,8 +27,8 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8
; O1-NEXT: ret void
; O1: for.cond.cleanup3:
; O1-NEXT: [[INC7]] = add nuw nsw i64 [[I_06]], 1
-; O1-NEXT: [[EXITCOND7_NOT:%.*]] = icmp eq i64 [[INC7]], 100
-; O1-NEXT: br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP0:![0-9]+]]
+; O1-NEXT: [[CMP_NOT:%.*]] = icmp samesign eq i64 [[I_06]], 99
+; O1-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP0:![0-9]+]]
; O1: for.body4:
; O1-NEXT: [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], [[FOR_BODY4]] ], [ 0, [[FOR_COND1_PREHEADER]] ]
; O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05]]
@@ -52,33 +52,33 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8
; O2-NEXT: [[I_06:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; O2-NEXT: br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]]
; O2: for.body4.preheader:
-; O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER9:%.*]], label [[VECTOR_BODY:%.*]]
+; O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER8:%.*]], label [[VECTOR_BODY:%.*]]
; O2: vector.body:
; O2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY4_PREHEADER]] ]
; O2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]]
; O2-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; O2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
+; O2-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
; O2-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
-; O2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], splat (i32 1)
+; O2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD7]], splat (i32 1)
; O2-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
; O2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
; O2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; O2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; O2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; O2: middle.block:
-; O2-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER9]]
-; O2: for.body4.preheader9:
+; O2-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER8]]
+; O2: for.body4.preheader8:
; O2-NEXT: [[J_05_PH:%.*]] = phi i64 [ 0, [[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
; O2-NEXT: br label [[FOR_BODY4:%.*]]
; O2: for.cond.cleanup:
; O2-NEXT: ret void
; O2: for.cond.cleanup3:
; O2-NEXT: [[INC7]] = add nuw nsw i64 [[I_06]], 1
-; O2-NEXT: [[EXITCOND7_NOT:%.*]] = icmp eq i64 [[INC7]], 100
-; O2-NEXT: br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP8:![0-9]+]]
+; O2-NEXT: [[CMP_NOT:%.*]] = icmp samesign eq i64 [[I_06]], 99
+; O2-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP8:![0-9]+]]
; O2: for.body4:
-; O2-NEXT: [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], [[FOR_BODY4]] ], [ [[J_05_PH]], [[FOR_BODY4_PREHEADER9]] ]
+; O2-NEXT: [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], [[FOR_BODY4]] ], [ [[J_05_PH]], [[FOR_BODY4_PREHEADER8]] ]
; O2-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05]]
; O2-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA0]]
; O2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1
@@ -106,9 +106,9 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8
; O3-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]]
; O3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; O3-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
+; O3-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
; O3-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
-; O3-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD9]], splat (i32 1)
+; O3-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], splat (i32 1)
; O3-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
; O3-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
; O3-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -130,8 +130,8 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8
; O3-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP8:![0-9]+]]
; O3: for.cond1.for.cond.cleanup3_crit_edge.us:
; O3-NEXT: [[INC7_US]] = add nuw nsw i64 [[I_06_US]], 1
-; O3-NEXT: [[EXITCOND8_NOT:%.*]] = icmp eq i64 [[INC7_US]], 100
-; O3-NEXT: br i1 [[EXITCOND8_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]], !llvm.loop [[LOOP9:![0-9]+]]
+; O3-NEXT: [[CMP_US_NOT:%.*]] = icmp samesign eq i64 [[I_06_US]], 99
+; O3-NEXT: br i1 [[CMP_US_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]], !llvm.loop [[LOOP9:![0-9]+]]
; O3: for.cond.cleanup:
; O3-NEXT: ret void
;
More information about the llvm-commits
mailing list