[llvm] LICM: extend hoist BO assoc to mul case (PR #106991)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 2 06:45:36 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Ramkumar Ramachandra (artagnon)

<details>
<summary>Changes</summary>

Trivially extend hoistBOAssociation to also handle the BinaryOperator Mul.

Alive2 proofs: https://alive2.llvm.org/ce/z/zjtR5g

-- 8< --
Based on #<!-- -->106978.

---
Full diff: https://github.com/llvm/llvm-project/pull/106991.diff


4 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/LICM.cpp (+20-17) 
- (modified) llvm/test/Transforms/LICM/hoist-binop.ll (+126-10) 
- (modified) llvm/test/Transforms/LICM/sink-foldable.ll (+1-2) 
- (modified) llvm/test/Transforms/LICM/update-scev-after-hoist.ll (+27-7) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 86c7dceffc5245..837cda22a57ac6 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2816,40 +2816,43 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
                                ICFLoopSafetyInfo &SafetyInfo,
                                MemorySSAUpdater &MSSAU, AssumptionCache *AC,
                                DominatorTree *DT) {
-  auto *BO = dyn_cast<BinaryOperator>(&I);
-  if (!BO || !BO->isAssociative())
-    return false;
+  using namespace PatternMatch;
 
-  // Only fold ADDs for now.
+  // Transform "(LV op C1) op C2" ==> "LV op (C1 op C2)"
+  Value *LV, *C1, *C2;
+  if (!match(&I, m_BinOp(m_BinOp(m_Value(LV), m_Value(C1)), m_Value(C2))) ||
+      L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) || !L.isLoopInvariant(C2))
+    return false;
+  auto *BO = cast<BinaryOperator>(&I),
+       *BO0 = cast<BinaryOperator>(BO->getOperand(0));
   Instruction::BinaryOps Opcode = BO->getOpcode();
-  if (Opcode != Instruction::Add)
+  if (BO0->getOpcode() != Opcode || !BO->isAssociative() ||
+      !BO0->isAssociative())
     return false;
 
-  auto *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
-  if (!BO0 || BO0->getOpcode() != Opcode || !BO0->isAssociative() ||
-      BO0->hasNUsesOrMore(3))
+  // TODO: Only hoist ADDs and MULs for now.
+  if (Opcode != Instruction::Add && Opcode != Instruction::Mul)
     return false;
 
-  // Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"
-  Value *LV = BO0->getOperand(0);
-  Value *C1 = BO0->getOperand(1);
-  Value *C2 = BO->getOperand(1);
-
-  if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) || !L.isLoopInvariant(C2))
+  // This is a heuristic to ensure that code-size doesn't blow up.
+  if (BO0->hasNUsesOrMore(3))
     return false;
 
   auto *Preheader = L.getLoopPreheader();
   assert(Preheader && "Loop is not in simplify form?");
 
-  auto *Inv = BinaryOperator::Create(Opcode, C1, C2, "invariant.op",
-                                     Preheader->getTerminator()->getIterator());
+  IRBuilder<> Builder(Preheader->getTerminator());
+  auto *Inv = Builder.CreateBinOp(Opcode, C1, C2, "invariant.op");
+
   auto *NewBO = BinaryOperator::Create(
       Opcode, LV, Inv, BO->getName() + ".reass", BO->getIterator());
 
   // Copy NUW for ADDs if both instructions have it.
   if (Opcode == Instruction::Add && BO->hasNoUnsignedWrap() &&
       BO0->hasNoUnsignedWrap()) {
-    Inv->setHasNoUnsignedWrap(true);
+    // If the constant-folder didn't kick in, and a new Instruction was created.
+    if (auto *I = dyn_cast<Instruction>(Inv))
+      I->setHasNoUnsignedWrap(true);
     NewBO->setHasNoUnsignedWrap(true);
   }
 
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
index 7a309ea294fc85..991cab02b5d959 100644
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -passes=licm < %s | FileCheck %s
 
-; Fold ADD and remove old op if unused.
+; Hoist ADD and remove old op if unused.
 define void @add_one_use(i64 %c1, i64 %c2) {
 ; CHECK-LABEL: @add_one_use(
 ; CHECK-NEXT:  entry:
@@ -22,8 +22,28 @@ loop:
   br label %loop
 }
 
-; Fold ADD and copy NUW if both ops have it.
-; https://alive2.llvm.org/ce/z/bPAT7Z
+; Hoist MUL and remove old op if unused.
+define void @mul_one_use(i64 %c1, i64 %c2) {
+; CHECK-LABEL: @mul_one_use(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FACTOR_OP_MUL:%.*]] = mul i64 [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[STEP_ADD_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD_REASS]] = mul i64 [[INDEX]], [[FACTOR_OP_MUL]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+  %step.add = mul i64 %index, %c1
+  %index.next = mul i64 %step.add, %c2
+  br label %loop
+}
+
+; Hoist ADD and copy NUW if both ops have it.
 define void @add_nuw(i64 %c1, i64 %c2) {
 ; CHECK-LABEL: @add_nuw(
 ; CHECK-NEXT:  entry:
@@ -47,7 +67,31 @@ loop:
   br label %loop
 }
 
-; Fold ADD but don't copy NUW if only one op has it.
+; Hoist MUL and drop NUW if both ops have it.
+define void @mul_nuw(i64 %c1, i64 %c2) {
+; CHECK-LABEL: @mul_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = mul i64 [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = mul nuw i64 [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = mul i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+  %step.add = mul nuw i64 %index, %c1
+  call void @use(i64 %step.add)
+  %index.next = mul nuw i64 %step.add, %c2
+  br label %loop
+}
+
+; Hoist ADD but don't copy NUW if only one op has it.
 define void @add_no_nuw(i64 %c1, i64 %c2) {
 ; CHECK-LABEL: @add_no_nuw(
 ; CHECK-NEXT:  entry:
@@ -71,7 +115,31 @@ loop:
   br label %loop
 }
 
-; Fold ADD but don't copy NSW if one op has it.
+; Hoist MUL and drop NUW if only one op has it.
+define void @mul_no_nuw(i64 %c1, i64 %c2) {
+; CHECK-LABEL: @mul_no_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = mul i64 [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = mul i64 [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = mul i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+  %step.add = mul i64 %index, %c1
+  call void @use(i64 %step.add)
+  %index.next = mul nuw i64 %step.add, %c2
+  br label %loop
+}
+
+; Hoist ADD but don't copy NSW if one op has it.
 define void @add_no_nsw(i64 %c1, i64 %c2) {
 ; CHECK-LABEL: @add_no_nsw(
 ; CHECK-NEXT:  entry:
@@ -95,7 +163,31 @@ loop:
   br label %loop
 }
 
-; Fold ADD but don't copy NSW even if both ops have it.
+; Hoist MUL and drop NSW if one op has it.
+define void @mul_no_nsw(i64 %c1, i64 %c2) {
+; CHECK-LABEL: @mul_no_nsw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = mul i64 [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = mul i64 [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = mul i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+  %step.add = mul i64 %index, %c1
+  call void @use(i64 %step.add)
+  %index.next = mul nsw i64 %step.add, %c2
+  br label %loop
+}
+
+; Hoist ADD but don't copy NSW even if both ops have it.
 define void @add_no_nsw_2(i64 %c1, i64 %c2) {
 ; CHECK-LABEL: @add_no_nsw_2(
 ; CHECK-NEXT:  entry:
@@ -119,7 +211,31 @@ loop:
   br label %loop
 }
 
-; Don't fold if the ops are different (even if they are both associative).
+; Hoist MUL and drop NSW if both ops have it.
+define void @mul_no_nsw_2(i64 %c1, i64 %c2) {
+; CHECK-LABEL: @mul_no_nsw_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = mul i64 [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = mul nsw i64 [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = mul i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+  %step.add = mul nsw i64 %index, %c1
+  call void @use(i64 %step.add)
+  %index.next = mul nsw i64 %step.add, %c2
+  br label %loop
+}
+
+; Don't hoist if the ops are different (even if they are both associative).
 define void @diff_ops(i64 %c1, i64 %c2) {
 ; CHECK-LABEL: @diff_ops(
 ; CHECK-NEXT:  entry:
@@ -142,7 +258,7 @@ loop:
   br label %loop
 }
 
-; Don't fold if the ops are not associative.
+; Don't hoist if the ops are not associative.
 define void @noassoc_ops(i64 %c1, i64 %c2) {
 ; CHECK-LABEL: @noassoc_ops(
 ; CHECK-NEXT:  entry:
@@ -165,7 +281,7 @@ loop:
   br label %loop
 }
 
-; Don't fold floating-point ops, even if they are associative. This would be
+; Don't hoist floating-point ops, even if they are associative. This would be
 ; valid, but is currently disabled.
 define void @fadd(float %c1, float %c2) {
 ; CHECK-LABEL: @fadd(
@@ -189,7 +305,7 @@ loop:
   br label %loop
 }
 
-; Don't fold if the intermediate op has more than two uses. This is an
+; Don't hoist if the intermediate op has more than two uses. This is an
 ; heuristic that can be adjusted if warranted. Currently we are being
 ; conservative to minimise potential impact in code size.
 define void @not_many_uses(i64 %c1, i64 %c2, i64 %c3) {
diff --git a/llvm/test/Transforms/LICM/sink-foldable.ll b/llvm/test/Transforms/LICM/sink-foldable.ll
index 36e2eab6313dcd..d1cf3de5301b29 100644
--- a/llvm/test/Transforms/LICM/sink-foldable.ll
+++ b/llvm/test/Transforms/LICM/sink-foldable.ll
@@ -77,7 +77,6 @@ return:
 define ptr @test2(i32 %j, ptr readonly %P, ptr readnone %Q) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = add i32 1, 1
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[I_ADDR_0:%.*]] = phi i32 [ [[ADD_REASS:%.*]], [[IF_END:%.*]] ]
@@ -98,7 +97,7 @@ define ptr @test2(i32 %j, ptr readonly %P, ptr readnone %Q) {
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[ADD_PTR]], i64 [[IDX2_EXT]]
 ; CHECK-NEXT:    [[L1:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt ptr [[L1]], [[Q]]
-; CHECK-NEXT:    [[ADD_REASS]] = add i32 [[I_ADDR]], [[INVARIANT_OP]]
+; CHECK-NEXT:    [[ADD_REASS]] = add i32 [[I_ADDR]], 2
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[LOOPEXIT2:%.*]], label [[FOR_COND]]
 ; CHECK:       loopexit0:
 ; CHECK-NEXT:    [[P0:%.*]] = phi ptr [ null, [[FOR_COND]] ]
diff --git a/llvm/test/Transforms/LICM/update-scev-after-hoist.ll b/llvm/test/Transforms/LICM/update-scev-after-hoist.ll
index fc45b8fce1766a..e303d04ce31913 100644
--- a/llvm/test/Transforms/LICM/update-scev-after-hoist.ll
+++ b/llvm/test/Transforms/LICM/update-scev-after-hoist.ll
@@ -1,13 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -passes='loop-unroll,loop-mssa(licm),print<scalar-evolution>' -unroll-count=4 -disable-output < %s 2>&1 | FileCheck %s --check-prefix=SCEV-EXPR
 
 define i16 @main() {
-; SCEV-EXPR:      Classifying expressions for: @main
-; SCEV-EXPR-NEXT:  %mul = phi i16 [ 1, %entry ], [ %mul.n.3, %loop ]
-; SCEV-EXPR-NEXT:  -->  %mul U: [0,-15) S: [-32768,32753)		Exits: 4096		LoopDispositions: { %loop: Variant }
-; SCEV-EXPR-NEXT:  %div = phi i16 [ 32767, %entry ], [ %div.n.3, %loop ]
-; SCEV-EXPR-NEXT:  -->  %div U: [-2048,-32768) S: [-2048,-32768)		Exits: 7		LoopDispositions: { %loop: Variant }
-; SCEV-EXPR-NEXT:  %mul.n.reass.reass = mul i16 %mul, 8
-; SCEV-EXPR-NEXT:  -->  (8 * %mul) U: [0,-7) S: [-32768,32761)		Exits: -32768		LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-LABEL: 'main'
+; SCEV-EXPR-NEXT:  Classifying expressions for: @main
+; SCEV-EXPR-NEXT:    %mul = phi i16 [ 1, %entry ], [ %mul.n.3.reass, %loop ]
+; SCEV-EXPR-NEXT:    --> %mul U: [0,-15) S: [-32768,32753) Exits: 4096 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT:    %div = phi i16 [ 32767, %entry ], [ %div.n.3, %loop ]
+; SCEV-EXPR-NEXT:    --> %div U: [-2048,-32768) S: [-2048,-32768) Exits: 7 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT:    %mul.n.reass.reass = mul i16 %mul, 8
+; SCEV-EXPR-NEXT:    --> (8 * %mul) U: [0,-7) S: [-32768,32761) Exits: -32768 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT:    %div.n = sdiv i16 %div, 2
+; SCEV-EXPR-NEXT:    --> %div.n U: [-16384,16384) S: [-16384,16384) Exits: 3 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT:    %div.n.1 = sdiv i16 %div.n, 2
+; SCEV-EXPR-NEXT:    --> %div.n.1 U: [-8192,8192) S: [-8192,8192) Exits: 1 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT:    %div.n.2 = sdiv i16 %div.n.1, 2
+; SCEV-EXPR-NEXT:    --> %div.n.2 U: [-4096,4096) S: [-4096,4096) Exits: 0 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT:    %mul.n.3.reass = mul i16 %mul, 16
+; SCEV-EXPR-NEXT:    --> (16 * %mul) U: [0,-15) S: [-32768,32753) Exits: 0 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT:    %div.n.3 = sdiv i16 %div.n.2, 2
+; SCEV-EXPR-NEXT:    --> %div.n.3 U: [-2048,2048) S: [-2048,2048) Exits: 0 LoopDispositions: { %loop: Variant }
+; SCEV-EXPR-NEXT:    %mul.lcssa = phi i16 [ %mul.n.reass.reass, %loop ]
+; SCEV-EXPR-NEXT:    --> (8 * %mul) U: [0,-7) S: [-32768,32761) --> -32768 U: [-32768,-32767) S: [-32768,-32767)
+; SCEV-EXPR-NEXT:  Determining loop execution counts for: @main
+; SCEV-EXPR-NEXT:  Loop %loop: backedge-taken count is i32 3
+; SCEV-EXPR-NEXT:  Loop %loop: constant max backedge-taken count is i32 3
+; SCEV-EXPR-NEXT:  Loop %loop: symbolic max backedge-taken count is i32 3
+; SCEV-EXPR-NEXT:  Loop %loop: Trip multiple is 4
+;
 entry:
   br label %loop
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/106991


More information about the llvm-commits mailing list