[llvm] 0d95b20 - [LICM] Reassociate & hoist add expressions
Max Kazantsev via llvm-commits
llvm-commits at lists.llvm.org
Sun May 21 23:22:30 PDT 2023
Author: Max Kazantsev
Date: 2023-05-22T13:22:22+07:00
New Revision: 0d95b20b63d7acc459dc0b2a7b2e4f9924c0adce
URL: https://github.com/llvm/llvm-project/commit/0d95b20b63d7acc459dc0b2a7b2e4f9924c0adce
DIFF: https://github.com/llvm/llvm-project/commit/0d95b20b63d7acc459dc0b2a7b2e4f9924c0adce.diff
LOG: [LICM] Reassociate & hoist add expressions
This patch allows LICM to reassociate and hoist following expressions:
```
loop:
%sum = add nsw %iv, %C1
%cmp = icmp <signed pred> %sum, C2
```
where `C1` and `C2` are loop invariants. The reassociated version looks like
```
preheader:
%inv_sum = C2 - C1
...
loop:
%cmp = icmp <signed pred> %iv, %inv_sum
```
In order to prove legality, we need both initial addition and the newly created subtraction
to happen without overflow.
Differential Revision: https://reviews.llvm.org/D149132
Reviewed By: skatkov
Added:
Modified:
llvm/lib/Transforms/Scalar/LICM.cpp
llvm/test/Transforms/LICM/hoist-add-sub.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 0f9fb864c9e12..eb732aecb2d69 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -106,6 +106,8 @@ STATISTIC(NumMinMaxHoisted,
"Number of min/max expressions hoisted out of the loop");
STATISTIC(NumGEPsHoisted,
"Number of geps reassociated and hoisted out of the loop");
+STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
+ "and hoisted out of the loop");
/// Memory promotion is enabled by default.
static cl::opt<bool>
@@ -2525,10 +2527,89 @@ static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
return true;
}
+/// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
+/// C1 and C2 are loop invariants and LV is a loop-variant.
+static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
+ Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
+ ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
+ AssumptionCache *AC, DominatorTree *DT) {
+ assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
+ assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
+ assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
+
+ // Try to represent VariantLHS as sum of invariant and variant operands.
+ using namespace PatternMatch;
+ Value *VariantOp, *InvariantOp;
+ if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
+ return false;
+
+ // LHS itself is a loop-variant, try to represent it in the form:
+ // "VariantOp + InvariantOp". If it is possible, then we can reassociate.
+ if (L.isLoopInvariant(VariantOp))
+ std::swap(VariantOp, InvariantOp);
+ if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
+ return false;
+
+ // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
+ // freely move values from left side of inequality to right side (just as in
+ // normal linear arithmetics). Overflows make things much more complicated, so
+ // we want to avoid this.
+ auto &DL = L.getHeader()->getModule()->getDataLayout();
+ bool ProvedNoOverflowAfterReassociate =
+ computeOverflowForSignedSub(InvariantRHS, InvariantOp, DL, AC, &ICmp,
+ DT) == llvm::OverflowResult::NeverOverflows;
+ if (!ProvedNoOverflowAfterReassociate)
+ return false;
+ auto *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplify form?");
+ IRBuilder<> Builder(Preheader->getTerminator());
+ Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op",
+ /*HasNUW*/ false, /*HasNSW*/ true);
+ ICmp.setPredicate(Pred);
+ ICmp.setOperand(0, VariantOp);
+ ICmp.setOperand(1, NewCmpOp);
+ eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
+ return true;
+}
+
+/// Reassociate and hoist add/sub expressions.
+static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+ DominatorTree *DT) {
+ using namespace PatternMatch;
+ ICmpInst::Predicate Pred;
+ Value *LHS, *RHS;
+ if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
+ return false;
+
+ // TODO: Support unsigned predicates?
+ if (!ICmpInst::isSigned(Pred))
+ return false;
+
+ // Put variant operand to LHS position.
+ if (L.isLoopInvariant(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ // We want to delete the initial operation after reassociation, so only do it
+ // if it has no other uses.
+ if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse())
+ return false;
+
+ // TODO: We could go with smarter context, taking common dominator of all I's
+ // users instead of I itself.
+ if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
+ return true;
+
+ // TODO: Support Sub.
+
+ return false;
+}
+
static bool hoistArithmetics(Instruction &I, Loop &L,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater &MSSAU,
- AssumptionCache *AC, DominatorTree *DT) {
+ MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+ DominatorTree *DT) {
// Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
// into (x < min(INV1, INV2)), and hoisting the invariant part of this
// expression out of the loop.
@@ -2545,6 +2626,13 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
return true;
}
+ // Try to hoist add/sub's by reassociation.
+ if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
+ ++NumHoisted;
+ ++NumAddSubHoisted;
+ return true;
+ }
+
return false;
}
diff --git a/llvm/test/Transforms/LICM/hoist-add-sub.ll b/llvm/test/Transforms/LICM/hoist-add-sub.ll
index 7d02fd7509169..a80099db31b3f 100644
--- a/llvm/test/Transforms/LICM/hoist-add-sub.ll
+++ b/llvm/test/Transforms/LICM/hoist-add-sub.ll
@@ -165,18 +165,18 @@ out_of_bounds:
}
-; TODO: x + iv < 4 ==> iv < 4 - x
+; x + iv < 4 ==> iv < 4 - x
define i32 @test_02(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_02
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]]
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
-; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[X]], [[IV]]
-; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
+; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]]
; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
@@ -391,18 +391,18 @@ failed:
ret i32 -2
}
-; TODO: iv + x < 4 ==> iv < 4 - x
+; iv + x < 4 ==> iv < 4 - x
define i32 @test_04(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_04
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]]
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
-; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[IV]], [[X]]
-; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
+; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]]
; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
More information about the llvm-commits
mailing list