[llvm] dd0cf23 - [LICM] Reassociate & hoist sub expressions
Max Kazantsev via llvm-commits
llvm-commits at lists.llvm.org
Sun May 28 22:53:02 PDT 2023
Author: Max Kazantsev
Date: 2023-05-29T12:52:43+07:00
New Revision: dd0cf23e4a87765b03bb1003d4cc37445d8217b4
URL: https://github.com/llvm/llvm-project/commit/dd0cf23e4a87765b03bb1003d4cc37445d8217b4
DIFF: https://github.com/llvm/llvm-project/commit/dd0cf23e4a87765b03bb1003d4cc37445d8217b4.diff
LOG: [LICM] Reassociate & hoist sub expressions
LICM could reassociate mixed variant/invariant comparison/arithmetic operations
and hoist invariant parts out of loop if it can prove that they can be computed
without overflow. Motivating example here:
```
INV1 - VAR1 < INV2
```
can be turned into
```
VAR > INV1 - INV2
```
if we can prove no-signed-overflow here. Then `INV1 - INV2` can be computed
out of loop, so we save one arithmetic operation in-loop.
Reviewed By: skatkov
Differential Revision: https://reviews.llvm.org/D148001
Added:
Modified:
llvm/lib/Transforms/Scalar/LICM.cpp
llvm/test/Transforms/LICM/hoist-add-sub.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index eb732aecb2d69..fa964a8cea006 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2572,6 +2572,68 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
return true;
}
+/// Try to reassociate and hoist the following two patterns:
+/// LV - C1 < C2 --> LV < C1 + C2,
+/// C1 - LV < C2 --> LV > C1 - C2.
+static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
+ Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
+ ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
+ AssumptionCache *AC, DominatorTree *DT) {
+ assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
+ assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
+ assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
+
+ // Try to represent VariantLHS as sum of invariant and variant operands.
+ using namespace PatternMatch;
+ Value *VariantOp, *InvariantOp;
+ if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
+ return false;
+
+ bool VariantSubtracted = false;
+ // LHS itself is a loop-variant, try to represent it in the form:
+ // "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
+ // the variant operand goes with minus, we use a slightly
diff erent scheme.
+ if (L.isLoopInvariant(VariantOp)) {
+ std::swap(VariantOp, InvariantOp);
+ VariantSubtracted = true;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
+ return false;
+
+ // In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
+ // freely move values from left side of inequality to right side (just as in
+ // normal linear arithmetics). Overflows make things much more complicated, so
+ // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
+ // "C1 - C2" does not overflow.
+ auto &DL = L.getHeader()->getModule()->getDataLayout();
+ if (VariantSubtracted) {
+ // C1 - LV < C2 --> LV > C1 - C2
+ if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, DL, AC, &ICmp,
+ DT) != llvm::OverflowResult::NeverOverflows)
+ return false;
+ } else {
+ // LV - C1 < C2 --> LV < C1 + C2
+ if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, DL, AC, &ICmp,
+ DT) != llvm::OverflowResult::NeverOverflows)
+ return false;
+ }
+ auto *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplify form?");
+ IRBuilder<> Builder(Preheader->getTerminator());
+ Value *NewCmpOp =
+ VariantSubtracted
+ ? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op",
+ /*HasNUW*/ false, /*HasNSW*/ true)
+ : Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op",
+ /*HasNUW*/ false, /*HasNSW*/ true);
+ ICmp.setPredicate(Pred);
+ ICmp.setOperand(0, VariantOp);
+ ICmp.setOperand(1, NewCmpOp);
+ eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
+ return true;
+}
+
/// Reassociate and hoist add/sub expressions.
static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2601,7 +2663,8 @@ static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
return true;
- // TODO: Support Sub.
+ if (hoistSub(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
+ return true;
return false;
}
diff --git a/llvm/test/Transforms/LICM/hoist-add-sub.ll b/llvm/test/Transforms/LICM/hoist-add-sub.ll
index a80099db31b3f..5393cdb1d29c4 100644
--- a/llvm/test/Transforms/LICM/hoist-add-sub.ll
+++ b/llvm/test/Transforms/LICM/hoist-add-sub.ll
@@ -1,18 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -passes=licm -verify-memoryssa < %s | FileCheck %s
-; TODO: x - iv < 4 ==> iv > x - 4
+; x - iv < 4 ==> iv > x - 4
define i32 @test_01(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_01
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0:![0-9]+]]
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 [[X]], 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
-; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[X]], [[IV]]
-; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
+; CHECK-NEXT: [[X_CHECK:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_OP]]
; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
@@ -278,18 +278,18 @@ failed:
ret i32 -2
}
-; TODO: iv - x < 4 ==> iv < 4 + x
+; iv - x < 4 ==> iv < 4 + x
define i32 @test_03(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_03
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]]
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nsw i32 [[X]], 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
-; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[IV]], [[X]]
-; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
+; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]]
; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
More information about the llvm-commits
mailing list