[llvm] Reapply "[LICM] Fold associative binary ops to promote code hoisting (#81608)" (PR #100377)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 25 08:47:56 PDT 2024
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/100377
>From 44823a10554d4d3e68735133d4b63ad00c443173 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at ed.ac.uk>
Date: Tue, 13 Feb 2024 12:23:01 +0000
Subject: [PATCH 1/3] [LICM] Fold ADDs to promote code hoisting
Perform the transformation
"(LV op C1) op C2" ==> "LV op (C1 op C2)"
where op is an associative binary op, LV is a loop variant, and C1 and
C2 are loop invariants, and hoist (C1 op C2) into the preheader.
For now this fold is restricted to ADDs.
---
llvm/lib/Transforms/Scalar/LICM.cpp | 74 +++++
llvm/test/CodeGen/PowerPC/common-chain.ll | 315 +++++++++++----------
llvm/test/Transforms/LICM/hoist-binop.ll | 229 +++++++++++++++
llvm/test/Transforms/LICM/sink-foldable.ll | 5 +-
4 files changed, 466 insertions(+), 157 deletions(-)
create mode 100644 llvm/test/Transforms/LICM/hoist-binop.ll
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 91ef2b4b7c183..fe29fc36e2bb2 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -113,6 +113,8 @@ STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
STATISTIC(NumIntAssociationsHoisted,
"Number of invariant int expressions "
"reassociated and hoisted out of the loop");
+STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
+ "reassociated and hoisted out of the loop");
/// Memory promotion is enabled by default.
static cl::opt<bool>
@@ -2779,6 +2781,72 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
return true;
}
+/// Reassociate associative binary expressions of the form
+///
+/// 1. "(LV op C1) op C2" ==> "LV op (C1 op C2)"
+///
+/// where op is an associative binary op, LV is a loop variant, and C1 and C2
+/// are loop invariants that we want to hoist.
+///
+/// TODO: This can be extended to more cases such as
+/// 2. "C1 op (C2 op LV)" ==> "(C1 op C2) op LV"
+/// 3. "(C1 op LV) op C2" ==> "LV op (C1 op C2)" if op is commutative
+/// 4. "C1 op (LV op C2)" ==> "(C1 op C2) op LV" if op is commutative
+static bool hoistBOAssociation(Instruction &I, Loop &L,
+ ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+ DominatorTree *DT) {
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(&I);
+ if (!BO || !BO->isAssociative())
+ return false;
+
+ // Only fold ADDs for now.
+ Instruction::BinaryOps Opcode = BO->getOpcode();
+ if (Opcode != Instruction::Add)
+ return false;
+
+ BinaryOperator *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
+
+ // Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"
+ if (BO0 && BO0->getOpcode() == Opcode && BO0->isAssociative()) {
+ Value *LV = BO0->getOperand(0);
+ Value *C1 = BO0->getOperand(1);
+ Value *C2 = BO->getOperand(1);
+
+ if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) ||
+ !L.isLoopInvariant(C2))
+ return false;
+
+ auto *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplify form?");
+
+ auto *Inv = BinaryOperator::Create(Opcode, C1, C2, "invariant.op",
+ Preheader->getTerminator());
+ auto *NewBO = BinaryOperator::Create(Opcode, LV, Inv,
+ BO->getName() + ".reass", BO);
+
+ // Copy NUW for ADDs if both instructions have it.
+ // https://alive2.llvm.org/ce/z/K9W3rk
+ if (Opcode == Instruction::Add && BO->hasNoUnsignedWrap() &&
+ BO0->hasNoUnsignedWrap()) {
+ Inv->setHasNoUnsignedWrap(true);
+ NewBO->setHasNoUnsignedWrap(true);
+ }
+
+ BO->replaceAllUsesWith(NewBO);
+ eraseInstruction(*BO, SafetyInfo, MSSAU);
+
+ // Note: (LV op C1) might not be erased if it has more uses than the one we
+ // just replaced.
+ if (BO0->use_empty())
+ eraseInstruction(*BO0, SafetyInfo, MSSAU);
+
+ return true;
+ }
+
+ return false;
+}
+
static bool hoistArithmetics(Instruction &I, Loop &L,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2816,6 +2884,12 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
return true;
}
+ if (hoistBOAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
+ ++NumHoisted;
+ ++NumBOAssociationsHoisted;
+ return true;
+ }
+
return false;
}
diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index 5f8c21e30f8fd..ccf0e4520f468 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -642,8 +642,8 @@ define i64 @two_chain_two_bases_succ(ptr %p, i64 %offset, i64 %base1, i64 %base2
; CHECK-NEXT: cmpdi r7, 0
; CHECK-NEXT: ble cr0, .LBB6_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: add r6, r6, r4
; CHECK-NEXT: add r5, r5, r4
+; CHECK-NEXT: add r6, r6, r4
; CHECK-NEXT: mtctr r7
; CHECK-NEXT: sldi r4, r4, 1
; CHECK-NEXT: add r5, r3, r5
@@ -743,214 +743,219 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64
; CHECK-NEXT: std r9, -184(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r4, -160(r1) # 8-byte Folded Spill
; CHECK-NEXT: ble cr0, .LBB7_7
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: sldi r6, r6, 2
-; CHECK-NEXT: li r7, 1
-; CHECK-NEXT: mr r30, r10
-; CHECK-NEXT: cmpdi r6, 1
-; CHECK-NEXT: iselgt r7, r6, r7
-; CHECK-NEXT: addi r8, r7, -1
-; CHECK-NEXT: clrldi r6, r7, 63
-; CHECK-NEXT: cmpldi r8, 3
+; CHECK-NEXT: sldi r4, r6, 2
+; CHECK-NEXT: li r6, 1
+; CHECK-NEXT: mr r0, r10
+; CHECK-NEXT: std r10, -192(r1) # 8-byte Folded Spill
+; CHECK-NEXT: cmpdi r4, 1
+; CHECK-NEXT: iselgt r4, r4, r6
+; CHECK-NEXT: addi r7, r4, -1
+; CHECK-NEXT: clrldi r6, r4, 63
+; CHECK-NEXT: cmpldi r7, 3
; CHECK-NEXT: blt cr0, .LBB7_4
; CHECK-NEXT: # %bb.2: # %for.body.preheader.new
-; CHECK-NEXT: ld r14, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: mulli r24, r30, 24
-; CHECK-NEXT: ld r16, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r15, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: rldicl r0, r7, 62, 2
-; CHECK-NEXT: sldi r11, r30, 5
-; CHECK-NEXT: sldi r19, r30, 4
-; CHECK-NEXT: sldi r7, r14, 3
-; CHECK-NEXT: add r14, r30, r14
-; CHECK-NEXT: sldi r10, r16, 3
-; CHECK-NEXT: sldi r12, r15, 3
-; CHECK-NEXT: add r16, r30, r16
-; CHECK-NEXT: add r15, r30, r15
-; CHECK-NEXT: add r27, r11, r7
-; CHECK-NEXT: add r22, r24, r7
-; CHECK-NEXT: add r17, r19, r7
-; CHECK-NEXT: sldi r2, r14, 3
-; CHECK-NEXT: add r26, r24, r10
-; CHECK-NEXT: add r25, r24, r12
-; CHECK-NEXT: add r21, r19, r10
-; CHECK-NEXT: add r20, r19, r12
-; CHECK-NEXT: add r8, r11, r10
-; CHECK-NEXT: sldi r16, r16, 3
-; CHECK-NEXT: add r29, r5, r27
-; CHECK-NEXT: add r28, r4, r27
-; CHECK-NEXT: add r27, r3, r27
-; CHECK-NEXT: add r24, r5, r22
-; CHECK-NEXT: add r23, r4, r22
-; CHECK-NEXT: add r22, r3, r22
-; CHECK-NEXT: add r19, r5, r17
-; CHECK-NEXT: add r18, r4, r17
-; CHECK-NEXT: add r17, r3, r17
-; CHECK-NEXT: add r14, r5, r2
-; CHECK-NEXT: add r31, r4, r2
-; CHECK-NEXT: add r2, r3, r2
-; CHECK-NEXT: add r9, r5, r8
-; CHECK-NEXT: add r8, r11, r12
+; CHECK-NEXT: ld r0, -192(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r8, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT: rldicl r7, r4, 62, 2
+; CHECK-NEXT: ld r9, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT: add r11, r0, r30
+; CHECK-NEXT: add r4, r0, r0
+; CHECK-NEXT: mulli r23, r0, 24
+; CHECK-NEXT: add r14, r0, r8
+; CHECK-NEXT: sldi r12, r0, 5
+; CHECK-NEXT: add r31, r0, r9
+; CHECK-NEXT: sldi r9, r9, 3
+; CHECK-NEXT: sldi r18, r0, 4
+; CHECK-NEXT: sldi r8, r8, 3
+; CHECK-NEXT: add r10, r4, r4
+; CHECK-NEXT: sldi r4, r30, 3
+; CHECK-NEXT: sldi r11, r11, 3
+; CHECK-NEXT: add r26, r12, r9
+; CHECK-NEXT: add r16, r18, r9
+; CHECK-NEXT: add r29, r12, r8
+; CHECK-NEXT: add r19, r18, r8
+; CHECK-NEXT: add r30, r12, r4
+; CHECK-NEXT: mr r20, r4
+; CHECK-NEXT: std r4, -200(r1) # 8-byte Folded Spill
+; CHECK-NEXT: ld r4, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: add r15, r5, r11
+; CHECK-NEXT: sldi r11, r14, 3
+; CHECK-NEXT: add r29, r5, r29
+; CHECK-NEXT: add r28, r3, r26
+; CHECK-NEXT: add r19, r5, r19
+; CHECK-NEXT: add r21, r23, r9
+; CHECK-NEXT: add r24, r23, r8
+; CHECK-NEXT: add r14, r5, r11
+; CHECK-NEXT: sldi r11, r31, 3
+; CHECK-NEXT: add r25, r23, r20
+; CHECK-NEXT: add r20, r18, r20
+; CHECK-NEXT: add r30, r5, r30
+; CHECK-NEXT: add r18, r3, r16
+; CHECK-NEXT: add r24, r5, r24
+; CHECK-NEXT: add r23, r3, r21
+; CHECK-NEXT: add r27, r4, r26
+; CHECK-NEXT: add r22, r4, r21
+; CHECK-NEXT: add r17, r4, r16
+; CHECK-NEXT: add r2, r4, r11
+; CHECK-NEXT: rldicl r4, r7, 2, 1
+; CHECK-NEXT: sub r7, r8, r9
+; CHECK-NEXT: ld r8, -200(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r26, r5, r26
; CHECK-NEXT: add r25, r5, r25
; CHECK-NEXT: add r21, r5, r21
; CHECK-NEXT: add r20, r5, r20
; CHECK-NEXT: add r16, r5, r16
-; CHECK-NEXT: add r8, r5, r8
-; CHECK-NEXT: rldicl r3, r0, 2, 1
-; CHECK-NEXT: addi r3, r3, -4
-; CHECK-NEXT: sub r0, r12, r7
-; CHECK-NEXT: sub r12, r10, r7
-; CHECK-NEXT: li r7, 0
-; CHECK-NEXT: mr r10, r30
-; CHECK-NEXT: sldi r15, r15, 3
-; CHECK-NEXT: add r15, r5, r15
-; CHECK-NEXT: rldicl r3, r3, 62, 2
-; CHECK-NEXT: addi r3, r3, 1
-; CHECK-NEXT: mtctr r3
+; CHECK-NEXT: add r31, r5, r11
+; CHECK-NEXT: add r11, r3, r11
+; CHECK-NEXT: addi r4, r4, -4
+; CHECK-NEXT: rldicl r4, r4, 62, 2
+; CHECK-NEXT: sub r8, r8, r9
+; CHECK-NEXT: li r9, 0
+; CHECK-NEXT: addi r4, r4, 1
+; CHECK-NEXT: mtctr r4
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_3: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lfd f0, 0(r2)
-; CHECK-NEXT: lfd f1, 0(r31)
-; CHECK-NEXT: add r3, r10, r30
-; CHECK-NEXT: add r3, r3, r30
+; CHECK-NEXT: lfd f0, 0(r11)
+; CHECK-NEXT: lfd f1, 0(r2)
+; CHECK-NEXT: add r0, r0, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfd f1, 0(r14)
-; CHECK-NEXT: add r3, r3, r30
-; CHECK-NEXT: add r10, r3, r30
+; CHECK-NEXT: lfd f1, 0(r31)
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfd f0, 0(r14)
-; CHECK-NEXT: add r14, r14, r11
-; CHECK-NEXT: lfdx f0, r2, r0
-; CHECK-NEXT: lfdx f1, r31, r0
+; CHECK-NEXT: stfd f0, 0(r31)
+; CHECK-NEXT: add r31, r31, r12
+; CHECK-NEXT: lfdx f0, r11, r7
+; CHECK-NEXT: lfdx f1, r2, r7
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r15, r7
+; CHECK-NEXT: lfdx f1, r14, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r15, r7
-; CHECK-NEXT: lfdx f0, r2, r12
-; CHECK-NEXT: lfdx f1, r31, r12
-; CHECK-NEXT: add r2, r2, r11
-; CHECK-NEXT: add r31, r31, r11
+; CHECK-NEXT: stfdx f0, r14, r9
+; CHECK-NEXT: lfdx f0, r11, r8
+; CHECK-NEXT: lfdx f1, r2, r8
+; CHECK-NEXT: add r11, r11, r12
+; CHECK-NEXT: add r2, r2, r12
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r16, r7
+; CHECK-NEXT: lfdx f1, r15, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r16, r7
-; CHECK-NEXT: lfd f0, 0(r17)
-; CHECK-NEXT: lfd f1, 0(r18)
+; CHECK-NEXT: stfdx f0, r15, r9
+; CHECK-NEXT: lfd f0, 0(r18)
+; CHECK-NEXT: lfd f1, 0(r17)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r19, r7
+; CHECK-NEXT: lfdx f1, r16, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r19, r7
-; CHECK-NEXT: lfdx f0, r17, r0
-; CHECK-NEXT: lfdx f1, r18, r0
+; CHECK-NEXT: stfdx f0, r16, r9
+; CHECK-NEXT: lfdx f0, r18, r7
+; CHECK-NEXT: lfdx f1, r17, r7
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r20, r7
+; CHECK-NEXT: lfdx f1, r19, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r20, r7
-; CHECK-NEXT: lfdx f0, r17, r12
-; CHECK-NEXT: lfdx f1, r18, r12
-; CHECK-NEXT: add r17, r17, r11
-; CHECK-NEXT: add r18, r18, r11
+; CHECK-NEXT: stfdx f0, r19, r9
+; CHECK-NEXT: lfdx f0, r18, r8
+; CHECK-NEXT: lfdx f1, r17, r8
+; CHECK-NEXT: add r18, r18, r12
+; CHECK-NEXT: add r17, r17, r12
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r21, r7
+; CHECK-NEXT: lfdx f1, r20, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r21, r7
-; CHECK-NEXT: lfd f0, 0(r22)
-; CHECK-NEXT: lfd f1, 0(r23)
+; CHECK-NEXT: stfdx f0, r20, r9
+; CHECK-NEXT: lfd f0, 0(r23)
+; CHECK-NEXT: lfd f1, 0(r22)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r24, r7
+; CHECK-NEXT: lfdx f1, r21, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r24, r7
-; CHECK-NEXT: lfdx f0, r22, r0
-; CHECK-NEXT: lfdx f1, r23, r0
+; CHECK-NEXT: stfdx f0, r21, r9
+; CHECK-NEXT: lfdx f0, r23, r7
+; CHECK-NEXT: lfdx f1, r22, r7
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r25, r7
+; CHECK-NEXT: lfdx f1, r24, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r25, r7
-; CHECK-NEXT: lfdx f0, r22, r12
-; CHECK-NEXT: lfdx f1, r23, r12
-; CHECK-NEXT: add r22, r22, r11
-; CHECK-NEXT: add r23, r23, r11
+; CHECK-NEXT: stfdx f0, r24, r9
+; CHECK-NEXT: lfdx f0, r23, r8
+; CHECK-NEXT: lfdx f1, r22, r8
+; CHECK-NEXT: add r23, r23, r12
+; CHECK-NEXT: add r22, r22, r12
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r26, r7
+; CHECK-NEXT: lfdx f1, r25, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r26, r7
-; CHECK-NEXT: lfd f0, 0(r27)
-; CHECK-NEXT: lfd f1, 0(r28)
+; CHECK-NEXT: stfdx f0, r25, r9
+; CHECK-NEXT: lfd f0, 0(r28)
+; CHECK-NEXT: lfd f1, 0(r27)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r29, r7
+; CHECK-NEXT: lfdx f1, r26, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r29, r7
-; CHECK-NEXT: lfdx f0, r27, r0
-; CHECK-NEXT: lfdx f1, r28, r0
+; CHECK-NEXT: stfdx f0, r26, r9
+; CHECK-NEXT: lfdx f0, r28, r7
+; CHECK-NEXT: lfdx f1, r27, r7
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r8, r7
+; CHECK-NEXT: lfdx f1, r29, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r8, r7
-; CHECK-NEXT: lfdx f0, r27, r12
-; CHECK-NEXT: lfdx f1, r28, r12
-; CHECK-NEXT: add r27, r27, r11
-; CHECK-NEXT: add r28, r28, r11
+; CHECK-NEXT: stfdx f0, r29, r9
+; CHECK-NEXT: lfdx f0, r28, r8
+; CHECK-NEXT: lfdx f1, r27, r8
+; CHECK-NEXT: add r28, r28, r12
+; CHECK-NEXT: add r27, r27, r12
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r9, r7
+; CHECK-NEXT: lfdx f1, r30, r9
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r9, r7
-; CHECK-NEXT: add r7, r7, r11
+; CHECK-NEXT: stfdx f0, r30, r9
+; CHECK-NEXT: add r9, r9, r12
; CHECK-NEXT: bdnz .LBB7_3
; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa
+; CHECK-NEXT: ld r7, -192(r1) # 8-byte Folded Reload
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: beq cr0, .LBB7_7
; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader
-; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: sldi r8, r30, 3
-; CHECK-NEXT: add r3, r10, r3
-; CHECK-NEXT: sldi r3, r3, 3
-; CHECK-NEXT: add r7, r5, r3
-; CHECK-NEXT: add r9, r4, r3
-; CHECK-NEXT: add r11, r0, r3
-; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r3, r10, r3
-; CHECK-NEXT: sldi r3, r3, 3
-; CHECK-NEXT: add r12, r5, r3
-; CHECK-NEXT: add r30, r4, r3
-; CHECK-NEXT: add r29, r0, r3
-; CHECK-NEXT: ld r3, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r3, r10, r3
-; CHECK-NEXT: li r10, 0
-; CHECK-NEXT: sldi r3, r3, 3
-; CHECK-NEXT: add r5, r5, r3
-; CHECK-NEXT: add r4, r4, r3
-; CHECK-NEXT: add r3, r0, r3
+; CHECK-NEXT: ld r4, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: mr r30, r3
+; CHECK-NEXT: sldi r7, r7, 3
+; CHECK-NEXT: add r4, r0, r4
+; CHECK-NEXT: sldi r4, r4, 3
+; CHECK-NEXT: add r3, r5, r4
+; CHECK-NEXT: add r8, r29, r4
+; CHECK-NEXT: add r9, r30, r4
+; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT: add r4, r0, r4
+; CHECK-NEXT: sldi r4, r4, 3
+; CHECK-NEXT: add r10, r5, r4
+; CHECK-NEXT: add r11, r29, r4
+; CHECK-NEXT: add r12, r30, r4
+; CHECK-NEXT: ld r4, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT: add r4, r0, r4
+; CHECK-NEXT: sldi r0, r4, 3
+; CHECK-NEXT: add r5, r5, r0
+; CHECK-NEXT: add r4, r29, r0
+; CHECK-NEXT: add r30, r30, r0
+; CHECK-NEXT: li r0, 0
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_6: # %for.body.epil
; CHECK-NEXT: #
-; CHECK-NEXT: lfdx f0, r3, r10
-; CHECK-NEXT: lfdx f1, r4, r10
+; CHECK-NEXT: lfdx f0, r30, r0
+; CHECK-NEXT: lfdx f1, r4, r0
; CHECK-NEXT: addi r6, r6, -1
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfd f1, 0(r5)
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfd f0, 0(r5)
-; CHECK-NEXT: add r5, r5, r8
-; CHECK-NEXT: lfdx f0, r29, r10
-; CHECK-NEXT: lfdx f1, r30, r10
+; CHECK-NEXT: add r5, r5, r7
+; CHECK-NEXT: lfdx f0, r12, r0
+; CHECK-NEXT: lfdx f1, r11, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r12, r10
+; CHECK-NEXT: lfdx f1, r10, r0
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r12, r10
-; CHECK-NEXT: lfdx f0, r11, r10
-; CHECK-NEXT: lfdx f1, r9, r10
+; CHECK-NEXT: stfdx f0, r10, r0
+; CHECK-NEXT: lfdx f0, r9, r0
+; CHECK-NEXT: lfdx f1, r8, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r7, r10
+; CHECK-NEXT: lfdx f1, r3, r0
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r7, r10
-; CHECK-NEXT: add r10, r10, r8
+; CHECK-NEXT: stfdx f0, r3, r0
+; CHECK-NEXT: add r0, r0, r7
; CHECK-NEXT: bne cr0, .LBB7_6
; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup
; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
new file mode 100644
index 0000000000000..9cdcc359c61ab
--- /dev/null
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -0,0 +1,229 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=licm < %s | FileCheck %s
+
+; Fold ADD and remove old op if unused.
+; https://alive2.llvm.org/ce/z/wAY-Nd
+define void @add_one_use(i64 %c) {
+; CHECK-LABEL: @add_one_use(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C:%.*]], [[C]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = add i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = add i64 %index, %c
+ %index.next = add i64 %step.add, %c
+ br label %loop
+}
+
+; Fold ADD and copy NUW if both ops have it.
+; https://alive2.llvm.org/ce/z/wAY-Nd
+define void @add_nuw(i64 %c) {
+; CHECK-LABEL: @add_nuw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nuw i64 [[C:%.*]], [[C]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw i64 [[INDEX]], [[C]]
+; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = add nuw i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = add nuw i64 %index, %c
+ call void @use(i64 %step.add)
+ %index.next = add nuw i64 %step.add, %c
+ br label %loop
+}
+
+; Fold ADD but don't copy NUW if only one op has it.
+; https://alive2.llvm.org/ce/z/6n95Gf
+define void @add_no_nuw(i64 %c) {
+; CHECK-LABEL: @add_no_nuw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C:%.*]], [[C]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add i64 [[INDEX]], [[C]]
+; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = add i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = add i64 %index, %c
+ call void @use(i64 %step.add)
+ %index.next = add nuw i64 %step.add, %c
+ br label %loop
+}
+
+; Fold ADD but don't copy NSW if one op has it.
+; https://alive2.llvm.org/ce/z/iz3dfB
+define void @add_no_nsw(i64 %c) {
+; CHECK-LABEL: @add_no_nsw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C:%.*]], [[C]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add i64 [[INDEX]], [[C]]
+; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = add i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = add i64 %index, %c
+ call void @use(i64 %step.add)
+ %index.next = add nsw i64 %step.add, %c
+ br label %loop
+}
+
+; Fold ADD but don't copy NSW even if both ops have it.
+; https://alive2.llvm.org/ce/z/F9f43_
+define void @add_no_nsw_2(i64 %c) {
+; CHECK-LABEL: @add_no_nsw_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C:%.*]], [[C]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nsw i64 [[INDEX]], [[C]]
+; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = add i64 [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = add nsw i64 %index, %c
+ call void @use(i64 %step.add)
+ %index.next = add nsw i64 %step.add, %c
+ br label %loop
+}
+
+; Don't fold if the ops are different (even if they are both associative).
+define void @diff_ops(i64 %c) {
+; CHECK-LABEL: @diff_ops(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add i64 [[INDEX]], [[C:%.*]]
+; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT]] = mul i64 [[STEP_ADD]], [[C]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = add i64 %index, %c
+ call void @use(i64 %step.add)
+ %index.next = mul i64 %step.add, %c
+ br label %loop
+}
+
+; Don't fold if the ops are not associative.
+define void @noassoc_ops(i64 %c) {
+; CHECK-LABEL: @noassoc_ops(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = sub i64 [[INDEX]], [[C:%.*]]
+; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT]] = sub i64 [[STEP_ADD]], [[C]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
+ %step.add = sub i64 %index, %c
+ call void @use(i64 %step.add)
+ %index.next = sub i64 %step.add, %c
+ br label %loop
+}
+
+; Don't fold floating-point ops, even if they are associative.
+define void @fadd(float %c) {
+; CHECK-LABEL: @fadd(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C:%.*]]
+; CHECK-NEXT: call void @use(float [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT]] = fadd fast float [[STEP_ADD]], [[C]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+ %step.add = fadd fast float %index, %c
+ call void @use(float %step.add)
+ %index.next = fadd fast float %step.add, %c
+ br label %loop
+}
+
+; Original reproducer, adapted from:
+; for(long i = 0; i < n; ++i)
+; a[i] = (i*k) * v;
+define void @test(i64 %n, i64 %k) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[K_2:%.*]] = shl nuw nsw i64 [[K:%.*]], 1
+; CHECK-NEXT: [[VEC_INIT:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[K]], i64 1
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[K_2]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add <2 x i64> [[DOTSPLAT]], [[DOTSPLAT]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[VEC_INIT]], [[ENTRY:%.*]] ], [ [[VEC_IND_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: call void @use(<2 x i64> [[STEP_ADD]])
+; CHECK-NEXT: [[VEC_IND_NEXT_REASS]] = add <2 x i64> [[VEC_IND]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ %k.2 = shl nuw nsw i64 %k, 1
+ %vec.init = insertelement <2 x i64> zeroinitializer, i64 %k, i64 1
+ %.splatinsert = insertelement <2 x i64> poison, i64 %k.2, i64 0
+ %.splat = shufflevector <2 x i64> %.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
+ br label %loop
+
+loop:
+ %vec.ind = phi <2 x i64> [ %vec.init, %entry ], [ %vec.ind.next, %loop ]
+ %step.add = add <2 x i64> %vec.ind, %.splat
+ call void @use(<2 x i64> %step.add)
+ %vec.ind.next = add <2 x i64> %step.add, %.splat
+ br label %loop
+}
+
+declare void @use()
diff --git a/llvm/test/Transforms/LICM/sink-foldable.ll b/llvm/test/Transforms/LICM/sink-foldable.ll
index 38577a5a12563..36e2eab6313dc 100644
--- a/llvm/test/Transforms/LICM/sink-foldable.ll
+++ b/llvm/test/Transforms/LICM/sink-foldable.ll
@@ -77,9 +77,10 @@ return:
define ptr @test2(i32 %j, ptr readonly %P, ptr readnone %Q) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i32 1, 1
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond:
-; CHECK-NEXT: [[I_ADDR_0:%.*]] = phi i32 [ [[ADD:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT: [[I_ADDR_0:%.*]] = phi i32 [ [[ADD_REASS:%.*]], [[IF_END:%.*]] ]
; CHECK-NEXT: [[P_ADDR_0:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[IF_END]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_ADDR_0]], [[J:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[LOOPEXIT0:%.*]]
@@ -97,7 +98,7 @@ define ptr @test2(i32 %j, ptr readonly %P, ptr readnone %Q) {
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[ADD_PTR]], i64 [[IDX2_EXT]]
; CHECK-NEXT: [[L1:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt ptr [[L1]], [[Q]]
-; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD_I]], 1
+; CHECK-NEXT: [[ADD_REASS]] = add i32 [[I_ADDR]], [[INVARIANT_OP]]
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOPEXIT2:%.*]], label [[FOR_COND]]
; CHECK: loopexit0:
; CHECK-NEXT: [[P0:%.*]] = phi ptr [ null, [[FOR_COND]] ]
>From 7c0175c2c3d0ce95afb1184e8d54a7bffda4e9fa Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 25 Jul 2024 07:36:10 -0700
Subject: [PATCH 2/3] Address comments
---
llvm/lib/Transforms/Scalar/LICM.cpp | 61 ++++++++---------
llvm/test/Transforms/LICM/hoist-binop.ll | 87 ++++++++++++------------
2 files changed, 71 insertions(+), 77 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index fe29fc36e2bb2..81a5041edcab0 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2796,7 +2796,7 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
DominatorTree *DT) {
- BinaryOperator *BO = dyn_cast<BinaryOperator>(&I);
+ auto *BO = dyn_cast<BinaryOperator>(&I);
if (!BO || !BO->isAssociative())
return false;
@@ -2805,46 +2805,43 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
if (Opcode != Instruction::Add)
return false;
- BinaryOperator *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
+ auto *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
+ if (!BO0 || BO0->getOpcode() != Opcode || !BO0->isAssociative())
+ return false;
// Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"
- if (BO0 && BO0->getOpcode() == Opcode && BO0->isAssociative()) {
- Value *LV = BO0->getOperand(0);
- Value *C1 = BO0->getOperand(1);
- Value *C2 = BO->getOperand(1);
-
- if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) ||
- !L.isLoopInvariant(C2))
- return false;
+ Value *LV = BO0->getOperand(0);
+ Value *C1 = BO0->getOperand(1);
+ Value *C2 = BO->getOperand(1);
- auto *Preheader = L.getLoopPreheader();
- assert(Preheader && "Loop is not in simplify form?");
+ if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) ||
+ !L.isLoopInvariant(C2))
+ return false;
- auto *Inv = BinaryOperator::Create(Opcode, C1, C2, "invariant.op",
- Preheader->getTerminator());
- auto *NewBO = BinaryOperator::Create(Opcode, LV, Inv,
- BO->getName() + ".reass", BO);
+ auto *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplify form?");
- // Copy NUW for ADDs if both instructions have it.
- // https://alive2.llvm.org/ce/z/K9W3rk
- if (Opcode == Instruction::Add && BO->hasNoUnsignedWrap() &&
- BO0->hasNoUnsignedWrap()) {
- Inv->setHasNoUnsignedWrap(true);
- NewBO->setHasNoUnsignedWrap(true);
- }
+ auto *Inv = BinaryOperator::Create(Opcode, C1, C2, "invariant.op",
+ Preheader->getTerminator());
+ auto *NewBO =
+ BinaryOperator::Create(Opcode, LV, Inv, BO->getName() + ".reass", BO);
- BO->replaceAllUsesWith(NewBO);
- eraseInstruction(*BO, SafetyInfo, MSSAU);
+ // Copy NUW for ADDs if both instructions have it.
+ if (Opcode == Instruction::Add && BO->hasNoUnsignedWrap() &&
+ BO0->hasNoUnsignedWrap()) {
+ Inv->setHasNoUnsignedWrap(true);
+ NewBO->setHasNoUnsignedWrap(true);
+ }
- // Note: (LV op C1) might not be erased if it has more uses than the one we
- // just replaced.
- if (BO0->use_empty())
- eraseInstruction(*BO0, SafetyInfo, MSSAU);
+ BO->replaceAllUsesWith(NewBO);
+ eraseInstruction(*BO, SafetyInfo, MSSAU);
- return true;
- }
+ // (LV op C1) might not be erased if it has more uses than the one we just
+ // replaced.
+ if (BO0->use_empty())
+ eraseInstruction(*BO0, SafetyInfo, MSSAU);
- return false;
+ return true;
}
static bool hoistArithmetics(Instruction &I, Loop &L,
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
index 9cdcc359c61ab..8bda74eec34a0 100644
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -2,11 +2,10 @@
; RUN: opt -S -passes=licm < %s | FileCheck %s
; Fold ADD and remove old op if unused.
-; https://alive2.llvm.org/ce/z/wAY-Nd
-define void @add_one_use(i64 %c) {
+define void @add_one_use(i64 %c1, i64 %c2) {
; CHECK-LABEL: @add_one_use(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C:%.*]], [[C]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C1:%.*]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
@@ -18,21 +17,21 @@ entry:
loop:
%index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
- %step.add = add i64 %index, %c
- %index.next = add i64 %step.add, %c
+ %step.add = add i64 %index, %c1
+ %index.next = add i64 %step.add, %c2
br label %loop
}
; Fold ADD and copy NUW if both ops have it.
-; https://alive2.llvm.org/ce/z/wAY-Nd
-define void @add_nuw(i64 %c) {
+; https://alive2.llvm.org/ce/z/bPAT7Z
+define void @add_nuw(i64 %c1, i64 %c2) {
; CHECK-LABEL: @add_nuw(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nuw i64 [[C:%.*]], [[C]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nuw i64 [[C1:%.*]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw i64 [[INDEX]], [[C]]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw i64 [[INDEX]], [[C1]]
; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = add nuw i64 [[INDEX]], [[INVARIANT_OP]]
; CHECK-NEXT: br label [[LOOP]]
@@ -42,22 +41,21 @@ entry:
loop:
%index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
- %step.add = add nuw i64 %index, %c
+ %step.add = add nuw i64 %index, %c1
call void @use(i64 %step.add)
- %index.next = add nuw i64 %step.add, %c
+ %index.next = add nuw i64 %step.add, %c2
br label %loop
}
; Fold ADD but don't copy NUW if only one op has it.
-; https://alive2.llvm.org/ce/z/6n95Gf
-define void @add_no_nuw(i64 %c) {
+define void @add_no_nuw(i64 %c1, i64 %c2) {
; CHECK-LABEL: @add_no_nuw(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C:%.*]], [[C]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C1:%.*]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add i64 [[INDEX]], [[C]]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add i64 [[INDEX]], [[C1]]
; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = add i64 [[INDEX]], [[INVARIANT_OP]]
; CHECK-NEXT: br label [[LOOP]]
@@ -67,22 +65,21 @@ entry:
loop:
%index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
- %step.add = add i64 %index, %c
+ %step.add = add i64 %index, %c1
call void @use(i64 %step.add)
- %index.next = add nuw i64 %step.add, %c
+ %index.next = add nuw i64 %step.add, %c2
br label %loop
}
; Fold ADD but don't copy NSW if one op has it.
-; https://alive2.llvm.org/ce/z/iz3dfB
-define void @add_no_nsw(i64 %c) {
+define void @add_no_nsw(i64 %c1, i64 %c2) {
; CHECK-LABEL: @add_no_nsw(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C:%.*]], [[C]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C1:%.*]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add i64 [[INDEX]], [[C]]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add i64 [[INDEX]], [[C1]]
; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = add i64 [[INDEX]], [[INVARIANT_OP]]
; CHECK-NEXT: br label [[LOOP]]
@@ -92,22 +89,21 @@ entry:
loop:
%index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
- %step.add = add i64 %index, %c
+ %step.add = add i64 %index, %c1
call void @use(i64 %step.add)
- %index.next = add nsw i64 %step.add, %c
+ %index.next = add nsw i64 %step.add, %c2
br label %loop
}
; Fold ADD but don't copy NSW even if both ops have it.
-; https://alive2.llvm.org/ce/z/F9f43_
-define void @add_no_nsw_2(i64 %c) {
+define void @add_no_nsw_2(i64 %c1, i64 %c2) {
; CHECK-LABEL: @add_no_nsw_2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C:%.*]], [[C]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add i64 [[C1:%.*]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add nsw i64 [[INDEX]], [[C]]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nsw i64 [[INDEX]], [[C1]]
; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = add i64 [[INDEX]], [[INVARIANT_OP]]
; CHECK-NEXT: br label [[LOOP]]
@@ -117,22 +113,22 @@ entry:
loop:
%index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
- %step.add = add nsw i64 %index, %c
+ %step.add = add nsw i64 %index, %c1
call void @use(i64 %step.add)
- %index.next = add nsw i64 %step.add, %c
+ %index.next = add nsw i64 %step.add, %c2
br label %loop
}
; Don't fold if the ops are different (even if they are both associative).
-define void @diff_ops(i64 %c) {
+define void @diff_ops(i64 %c1, i64 %c2) {
; CHECK-LABEL: @diff_ops(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add i64 [[INDEX]], [[C:%.*]]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add i64 [[INDEX]], [[C1:%.*]]
; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
-; CHECK-NEXT: [[INDEX_NEXT]] = mul i64 [[STEP_ADD]], [[C]]
+; CHECK-NEXT: [[INDEX_NEXT]] = mul i64 [[STEP_ADD]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP]]
;
entry:
@@ -140,22 +136,22 @@ entry:
loop:
%index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
- %step.add = add i64 %index, %c
+ %step.add = add i64 %index, %c1
call void @use(i64 %step.add)
- %index.next = mul i64 %step.add, %c
+ %index.next = mul i64 %step.add, %c2
br label %loop
}
; Don't fold if the ops are not associative.
-define void @noassoc_ops(i64 %c) {
+define void @noassoc_ops(i64 %c1, i64 %c2) {
; CHECK-LABEL: @noassoc_ops(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = sub i64 [[INDEX]], [[C:%.*]]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = sub i64 [[INDEX]], [[C1:%.*]]
; CHECK-NEXT: call void @use(i64 [[STEP_ADD]])
-; CHECK-NEXT: [[INDEX_NEXT]] = sub i64 [[STEP_ADD]], [[C]]
+; CHECK-NEXT: [[INDEX_NEXT]] = sub i64 [[STEP_ADD]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP]]
;
entry:
@@ -163,22 +159,23 @@ entry:
loop:
%index = phi i64 [ 0, %entry ], [ %index.next, %loop ]
- %step.add = sub i64 %index, %c
+ %step.add = sub i64 %index, %c1
call void @use(i64 %step.add)
- %index.next = sub i64 %step.add, %c
+ %index.next = sub i64 %step.add, %c2
br label %loop
}
-; Don't fold floating-point ops, even if they are associative.
-define void @fadd(float %c) {
+; Don't fold floating-point ops, even if they are associative. This would be
+; valid, but is currently disabled.
+define void @fadd(float %c1, float %c2) {
; CHECK-LABEL: @fadd(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C:%.*]]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C1:%.*]]
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
-; CHECK-NEXT: [[INDEX_NEXT]] = fadd fast float [[STEP_ADD]], [[C]]
+; CHECK-NEXT: [[INDEX_NEXT]] = fadd fast float [[STEP_ADD]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP]]
;
entry:
@@ -186,9 +183,9 @@ entry:
loop:
%index = phi float [ 0., %entry ], [ %index.next, %loop ]
- %step.add = fadd fast float %index, %c
+ %step.add = fadd fast float %index, %c1
call void @use(float %step.add)
- %index.next = fadd fast float %step.add, %c
+ %index.next = fadd fast float %step.add, %c2
br label %loop
}
>From b190814adfd0d2b7c99b96e394d29f13132fd566 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 25 Jul 2024 08:31:15 -0700
Subject: [PATCH 3/3] Fix formatting
---
llvm/lib/Transforms/Scalar/LICM.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 81a5041edcab0..329b3ef0c8e4b 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2814,8 +2814,7 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
Value *C1 = BO0->getOperand(1);
Value *C2 = BO->getOperand(1);
- if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) ||
- !L.isLoopInvariant(C2))
+ if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) || !L.isLoopInvariant(C2))
return false;
auto *Preheader = L.getLoopPreheader();
More information about the llvm-commits
mailing list