[llvm] LICM: extend hoist BO assoc to mul case (PR #106991)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 2 06:45:36 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
Trivially extend hoistBOAssociation to also handle the BinaryOperator Mul.
Alive2 proofs: https://alive2.llvm.org/ce/z/zjtR5g
-- 8< --
Based on #<!-- -->106978.
---
Full diff: https://github.com/llvm/llvm-project/pull/106991.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/LICM.cpp (+20-17)
- (modified) llvm/test/Transforms/LICM/hoist-binop.ll (+126-10)
- (modified) llvm/test/Transforms/LICM/sink-foldable.ll (+1-2)
- (modified) llvm/test/Transforms/LICM/update-scev-after-hoist.ll (+27-7)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 86c7dceffc5245..837cda22a57ac6 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2816,40 +2816,43 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
DominatorTree *DT) {
- auto *BO = dyn_cast<BinaryOperator>(&I);
- if (!BO || !BO->isAssociative())
- return false;
+ using namespace PatternMatch;
- // Only fold ADDs for now.
+ // Transform "(LV op C1) op C2" ==> "LV op (C1 op C2)"
+ Value *LV, *C1, *C2;
+ if (!match(&I, m_BinOp(m_BinOp(m_Value(LV), m_Value(C1)), m_Value(C2))) ||
+ L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) || !L.isLoopInvariant(C2))
+ return false;
+ auto *BO = cast<BinaryOperator>(&I),
+ *BO0 = cast<BinaryOperator>(BO->getOperand(0));
Instruction::BinaryOps Opcode = BO->getOpcode();
- if (Opcode != Instruction::Add)
+ if (BO0->getOpcode() != Opcode || !BO->isAssociative() ||
+ !BO0->isAssociative())
return false;
- auto *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
- if (!BO0 || BO0->getOpcode() != Opcode || !BO0->isAssociative() ||
- BO0->hasNUsesOrMore(3))
+ // TODO: Only hoist ADDs and MULs for now.
+ if (Opcode != Instruction::Add && Opcode != Instruction::Mul)
return false;
- // Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"
- Value *LV = BO0->getOperand(0);
- Value *C1 = BO0->getOperand(1);
- Value *C2 = BO->getOperand(1);
-
- if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) || !L.isLoopInvariant(C2))
+ // This is a heuristic to ensure that code-size doesn't blow up.
+ if (BO0->hasNUsesOrMore(3))
return false;
auto *Preheader = L.getLoopPreheader();
assert(Preheader && "Loop is not in simplify form?");
- auto *Inv = BinaryOperator::Create(Opcode, C1, C2, "invariant.op",
- Preheader->getTerminator()->getIterator());
+ IRBuilder<> Builder(Preheader->getTerminator());
+ auto *Inv = Builder.CreateBinOp(Opcode, C1, C2, "invariant.op");
+
auto *NewBO = BinaryOperator::Create(
Opcode, LV, Inv, BO->getName() + ".reass", BO->getIterator());
// Copy NUW for ADDs if both instructions have it.
if (Opcode == Instruction::Add && BO->hasNoUnsignedWrap() &&
BO0->hasNoUnsignedWrap()) {
- Inv->setHasNoUnsignedWrap(true);
+ // If the constant-folder didn't kick in, and a new Instruction was created.
+ if (auto *I = dyn_cast<Instruction>(Inv))
+ I->setHasNoUnsignedWrap(true);
NewBO->setHasNoUnsignedWrap(true);
}
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
index 7a309ea294fc85..991cab02b5d959 100644
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=licm < %s | FileCheck %s
-; Fold ADD and remove old op if unused.
+; Hoist ADD and remove old op if unused.
define void @add_one_use(i64 %c1, i64 %c2) {
; CHECK-LABEL: @add_one_use(
; CHECK-NEXT: entry:
@@ -22,8 +22,28 @@ loop:
br label %loop
}
-; Fold ADD and copy NUW if both ops have it.
-; https://alive2.llvm.org/ce/z/bPAT7Z
+; Hoist MUL and remove old op if unused.
+define void @mul_one_use(i64 %c1, i64 %c2) {
+; CHECK-LABEL: @mul_one_use(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[FACTOR_OP_MUL:%.*]] = mul i64 [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[STEP_ADD_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD_REASS]] = mul i64 [[INDEX]], [[FACTOR_OP_MUL]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = mul i64 %index, %c1
+ %index.next = mul i64 %step.add, %c2
+ br label %loop
+}
+
+; Hoist ADD and copy NUW if both ops have it.
define void @add_nuw(i64 %c1, i64 %c2) {
; CHECK-LABEL: @add_nuw(
; CHECK-NEXT: entry:
@@ -47,7 +67,31 @@ loop:
br label %loop
}
-; Fold ADD but don't copy NUW if only one op has it.
+; Hoist MUL and drop NUW if both ops have it.
+define void @mul_nuw(i64 %c1, i64 %c2) {
+; CHECK-LABEL: @mul_nuw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = mul i64 [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = mul nuw i64 [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = mul i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = mul nuw i64 %index, %c1
+ call void @use(i64 %step.add)
+ %index.next = mul nuw i64 %step.add, %c2
+ br label %loop
+}
+
+; Hoist ADD but don't copy NUW if only one op has it.
define void @add_no_nuw(i64 %c1, i64 %c2) {
; CHECK-LABEL: @add_no_nuw(
; CHECK-NEXT: entry:
@@ -71,7 +115,31 @@ loop:
br label %loop
}
-; Fold ADD but don't copy NSW if one op has it.
+; Hoist MUL and drop NUW if only one op has it.
+define void @mul_no_nuw(i64 %c1, i64 %c2) {
+; CHECK-LABEL: @mul_no_nuw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = mul i64 [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = mul i64 [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = mul i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = mul i64 %index, %c1
+ call void @use(i64 %step.add)
+ %index.next = mul nuw i64 %step.add, %c2
+ br label %loop
+}
+
+; Hoist ADD but don't copy NSW if one op has it.
define void @add_no_nsw(i64 %c1, i64 %c2) {
; CHECK-LABEL: @add_no_nsw(
; CHECK-NEXT: entry:
@@ -95,7 +163,31 @@ loop:
br label %loop
}
-; Fold ADD but don't copy NSW even if both ops have it.
+; Hoist MUL and drop NSW if one op has it.
+define void @mul_no_nsw(i64 %c1, i64 %c2) {
+; CHECK-LABEL: @mul_no_nsw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = mul i64 [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = mul i64 [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = mul i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = mul i64 %index, %c1
+ call void @use(i64 %step.add)
+ %index.next = mul nsw i64 %step.add, %c2
+ br label %loop
+}
+
+; Hoist ADD but don't copy NSW even if both ops have it.
define void @add_no_nsw_2(i64 %c1, i64 %c2) {
; CHECK-LABEL: @add_no_nsw_2(
; CHECK-NEXT: entry:
@@ -119,7 +211,31 @@ loop:
br label %loop
}
-; Don't fold if the ops are different (even if they are both associative).
+; Hoist MUL and drop NSW if both ops have it.
+define void @mul_no_nsw_2(i64 %c1, i64 %c2) {
+; CHECK-LABEL: @mul_no_nsw_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = mul i64 [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = mul nsw i64 [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = mul i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = mul nsw i64 %index, %c1
+ call void @use(i64 %step.add)
+ %index.next = mul nsw i64 %step.add, %c2
+ br label %loop
+}
+
+; Don't hoist if the ops are different (even if they are both associative).
define void @diff_ops(i64 %c1, i64 %c2) {
; CHECK-LABEL: @diff_ops(
; CHECK-NEXT: entry:
@@ -142,7 +258,7 @@ loop:
br label %loop
}
-; Don't fold if the ops are not associative.
+; Don't hoist if the ops are not associative.
define void @noassoc_ops(i64 %c1, i64 %c2) {
; CHECK-LABEL: @noassoc_ops(
; CHECK-NEXT: entry:
@@ -165,7 +281,7 @@ loop:
br label %loop
}
-; Don't fold floating-point ops, even if they are associative. This would be
+; Don't hoist floating-point ops, even if they are associative. This would be
; valid, but is currently disabled.
define void @fadd(float %c1, float %c2) {
; CHECK-LABEL: @fadd(
@@ -189,7 +305,7 @@ loop:
br label %loop
}
-; Don't fold if the intermediate op has more than two uses. This is an
+; Don't hoist if the intermediate op has more than two uses. This is an
; heuristic that can be adjusted if warranted. Currently we are being
; conservative to minimise potential impact in code size.
define void @not_many_uses(i64 %c1, i64 %c2, i64 %c3) {
diff --git a/llvm/test/Transforms/LICM/sink-foldable.ll b/llvm/test/Transforms/LICM/sink-foldable.ll
index 36e2eab6313dcd..d1cf3de5301b29 100644
--- a/llvm/test/Transforms/LICM/sink-foldable.ll
+++ b/llvm/test/Transforms/LICM/sink-foldable.ll
@@ -77,7 +77,6 @@ return:
define ptr @test2(i32 %j, ptr readonly %P, ptr readnone %Q) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i32 1, 1
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[I_ADDR_0:%.*]] = phi i32 [ [[ADD_REASS:%.*]], [[IF_END:%.*]] ]
@@ -98,7 +97,7 @@ define ptr @test2(i32 %j, ptr readonly %P, ptr readnone %Q) {
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[ADD_PTR]], i64 [[IDX2_EXT]]
; CHECK-NEXT: [[L1:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt ptr [[L1]], [[Q]]
-; CHECK-NEXT: [[ADD_REASS]] = add i32 [[I_ADDR]], [[INVARIANT_OP]]
+; CHECK-NEXT: [[ADD_REASS]] = add i32 [[I_ADDR]], 2
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOPEXIT2:%.*]], label [[FOR_COND]]
; CHECK: loopexit0:
; CHECK-NEXT: [[P0:%.*]] = phi ptr [ null, [[FOR_COND]] ]
diff --git a/llvm/test/Transforms/LICM/update-scev-after-hoist.ll b/llvm/test/Transforms/LICM/update-scev-after-hoist.ll
index fc45b8fce1766a..e303d04ce31913 100644
--- a/llvm/test/Transforms/LICM/update-scev-after-hoist.ll
+++ b/llvm/test/Transforms/LICM/update-scev-after-hoist.ll
@@ -1,13 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes='loop-unroll,loop-mssa(licm),print<scalar-evolution>' -unroll-count=4 -disable-output < %s 2>&1 | FileCheck %s --check-prefix=SCEV-EXPR
define i16 @main() {
-; SCEV-EXPR: Classifying expressions for: @main
-; SCEV-EXPR-NEXT: %mul = phi i16 [ 1, %entry ], [ %mul.n.3, %loop ]
-; SCEV-EXPR-NEXT: --> %mul U: [0,-15) S: [-32768,32753) Exits: 4096 LoopDispositions: { %loop: Variant }
-; SCEV-EXPR-NEXT: %div = phi i16 [ 32767, %entry ], [ %div.n.3, %loop ]
-; SCEV-EXPR-NEXT: --> %div U: [-2048,-32768) S: [-2048,-32768) Exits: 7 LoopDispositions: { %loop: Variant }
-; SCEV-EXPR-NEXT: %mul.n.reass.reass = mul i16 %mul, 8
-; SCEV-EXPR-NEXT: --> (8 * %mul) U: [0,-7) S: [-32768,32761) Exits: -32768 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-LABEL: 'main'
+; SCEV-EXPR-NEXT: Classifying expressions for: @main
+; SCEV-EXPR-NEXT: %mul = phi i16 [ 1, %entry ], [ %mul.n.3.reass, %loop ]
+; SCEV-EXPR-NEXT: --> %mul U: [0,-15) S: [-32768,32753) Exits: 4096 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT: %div = phi i16 [ 32767, %entry ], [ %div.n.3, %loop ]
+; SCEV-EXPR-NEXT: --> %div U: [-2048,-32768) S: [-2048,-32768) Exits: 7 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT: %mul.n.reass.reass = mul i16 %mul, 8
+; SCEV-EXPR-NEXT: --> (8 * %mul) U: [0,-7) S: [-32768,32761) Exits: -32768 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT: %div.n = sdiv i16 %div, 2
+; SCEV-EXPR-NEXT: --> %div.n U: [-16384,16384) S: [-16384,16384) Exits: 3 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT: %div.n.1 = sdiv i16 %div.n, 2
+; SCEV-EXPR-NEXT: --> %div.n.1 U: [-8192,8192) S: [-8192,8192) Exits: 1 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT: %div.n.2 = sdiv i16 %div.n.1, 2
+; SCEV-EXPR-NEXT: --> %div.n.2 U: [-4096,4096) S: [-4096,4096) Exits: 0 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT: %mul.n.3.reass = mul i16 %mul, 16
+; SCEV-EXPR-NEXT: --> (16 * %mul) U: [0,-15) S: [-32768,32753) Exits: 0 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT: %div.n.3 = sdiv i16 %div.n.2, 2
+; SCEV-EXPR-NEXT: --> %div.n.3 U: [-2048,2048) S: [-2048,2048) Exits: 0 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT: %mul.lcssa = phi i16 [ %mul.n.reass.reass, %loop ]
+; SCEV-EXPR-NEXT: --> (8 * %mul) U: [0,-7) S: [-32768,32761) --> -32768 U: [-32768,-32767) S: [-32768,-32767)
+; SCEV-EXPR-NEXT: Determining loop execution counts for: @main
+; SCEV-EXPR-NEXT: Loop %loop: backedge-taken count is i32 3
+; SCEV-EXPR-NEXT: Loop %loop: constant max backedge-taken count is i32 3
+; SCEV-EXPR-NEXT: Loop %loop: symbolic max backedge-taken count is i32 3
+; SCEV-EXPR-NEXT: Loop %loop: Trip multiple is 4
+;
entry:
br label %loop
``````````
</details>
https://github.com/llvm/llvm-project/pull/106991
More information about the llvm-commits
mailing list