[llvm-branch-commits] [llvm] release/19.x: Revert " [LICM] Fold associative binary ops to promote code hoisting (#81608)" (PR #100094)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jul 23 03:23:52 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-powerpc

Author: None (llvmbot)

<details>
<summary>Changes</summary>

Backport b48819dbcdb48fc737dc22304ac343e4fdbae9ff

Requested by: @<!-- -->nikic

---

Patch is 26.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100094.diff


6 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/LICM.cpp (-62) 
- (modified) llvm/test/CodeGen/PowerPC/common-chain.ll (+155-160) 
- (modified) llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll (+5-11) 
- (removed) llvm/test/Transforms/LICM/hoist-binop.ll (-99) 
- (modified) llvm/test/Transforms/LICM/sink-foldable.ll (+2-2) 
- (modified) llvm/test/Transforms/LICM/update-scev-after-hoist.ll (+1-1) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index fe264503dee9e..91ef2b4b7c183 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -113,8 +113,6 @@ STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
 STATISTIC(NumIntAssociationsHoisted,
           "Number of invariant int expressions "
           "reassociated and hoisted out of the loop");
-STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
-                                    "reassociated and hoisted out of the loop");
 
 /// Memory promotion is enabled by default.
 static cl::opt<bool>
@@ -2781,60 +2779,6 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
   return true;
 }
 
-/// Reassociate general associative binary expressions of the form
-///
-/// 1. "(LV op C1) op C2" ==> "LV op (C1 op C2)"
-///
-/// where op is an associative binary op, LV is a loop variant, and C1 and C2
-/// are loop invariants that we want to hoist.
-///
-/// TODO: This can be extended to more cases such as
-/// 2. "C1 op (C2 op LV)" ==> "(C1 op C2) op LV"
-/// 3. "(C1 op LV) op C2" ==> "LV op (C1 op C2)" if op is commutative
-/// 4. "C1 op (LV op C2)" ==> "(C1 op C2) op LV" if op is commutative
-static bool hoistBOAssociation(Instruction &I, Loop &L,
-                               ICFLoopSafetyInfo &SafetyInfo,
-                               MemorySSAUpdater &MSSAU, AssumptionCache *AC,
-                               DominatorTree *DT) {
-  BinaryOperator *BO = dyn_cast<BinaryOperator>(&I);
-  if (!BO || !BO->isAssociative())
-    return false;
-
-  Instruction::BinaryOps Opcode = BO->getOpcode();
-  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
-
-  // Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"
-  if (Op0 && Op0->getOpcode() == Opcode) {
-    Value *LV = Op0->getOperand(0);
-    Value *C1 = Op0->getOperand(1);
-    Value *C2 = BO->getOperand(1);
-
-    if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) ||
-        !L.isLoopInvariant(C2))
-      return false;
-
-    auto *Preheader = L.getLoopPreheader();
-    assert(Preheader && "Loop is not in simplify form?");
-    IRBuilder<> Builder(Preheader->getTerminator());
-    Value *Inv = Builder.CreateBinOp(Opcode, C1, C2, "invariant.op");
-
-    auto *NewBO =
-        BinaryOperator::Create(Opcode, LV, Inv, BO->getName() + ".reass", BO);
-    NewBO->copyIRFlags(BO);
-    BO->replaceAllUsesWith(NewBO);
-    eraseInstruction(*BO, SafetyInfo, MSSAU);
-
-    // Note: (LV op C1) might not be erased if it has more uses than the one we
-    //       just replaced.
-    if (Op0->use_empty())
-      eraseInstruction(*Op0, SafetyInfo, MSSAU);
-
-    return true;
-  }
-
-  return false;
-}
-
 static bool hoistArithmetics(Instruction &I, Loop &L,
                              ICFLoopSafetyInfo &SafetyInfo,
                              MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2872,12 +2816,6 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
     return true;
   }
 
-  if (hoistBOAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
-    ++NumHoisted;
-    ++NumBOAssociationsHoisted;
-    return true;
-  }
-
   return false;
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index ccf0e4520f468..5f8c21e30f8fd 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -642,8 +642,8 @@ define i64 @two_chain_two_bases_succ(ptr %p, i64 %offset, i64 %base1, i64 %base2
 ; CHECK-NEXT:    cmpdi r7, 0
 ; CHECK-NEXT:    ble cr0, .LBB6_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    add r5, r5, r4
 ; CHECK-NEXT:    add r6, r6, r4
+; CHECK-NEXT:    add r5, r5, r4
 ; CHECK-NEXT:    mtctr r7
 ; CHECK-NEXT:    sldi r4, r4, 1
 ; CHECK-NEXT:    add r5, r3, r5
@@ -743,219 +743,214 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64
 ; CHECK-NEXT:    std r9, -184(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r8, -176(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r7, -168(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r4, -160(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r3, -160(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ble cr0, .LBB7_7
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    sldi r4, r6, 2
-; CHECK-NEXT:    li r6, 1
-; CHECK-NEXT:    mr r0, r10
-; CHECK-NEXT:    std r10, -192(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    cmpdi r4, 1
-; CHECK-NEXT:    iselgt r4, r4, r6
-; CHECK-NEXT:    addi r7, r4, -1
-; CHECK-NEXT:    clrldi r6, r4, 63
-; CHECK-NEXT:    cmpldi r7, 3
+; CHECK-NEXT:    sldi r6, r6, 2
+; CHECK-NEXT:    li r7, 1
+; CHECK-NEXT:    mr r30, r10
+; CHECK-NEXT:    cmpdi r6, 1
+; CHECK-NEXT:    iselgt r7, r6, r7
+; CHECK-NEXT:    addi r8, r7, -1
+; CHECK-NEXT:    clrldi r6, r7, 63
+; CHECK-NEXT:    cmpldi r8, 3
 ; CHECK-NEXT:    blt cr0, .LBB7_4
 ; CHECK-NEXT:  # %bb.2: # %for.body.preheader.new
-; CHECK-NEXT:    ld r0, -192(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r8, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    rldicl r7, r4, 62, 2
-; CHECK-NEXT:    ld r9, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    add r11, r0, r30
-; CHECK-NEXT:    add r4, r0, r0
-; CHECK-NEXT:    mulli r23, r0, 24
-; CHECK-NEXT:    add r14, r0, r8
-; CHECK-NEXT:    sldi r12, r0, 5
-; CHECK-NEXT:    add r31, r0, r9
-; CHECK-NEXT:    sldi r9, r9, 3
-; CHECK-NEXT:    sldi r18, r0, 4
-; CHECK-NEXT:    sldi r8, r8, 3
-; CHECK-NEXT:    add r10, r4, r4
-; CHECK-NEXT:    sldi r4, r30, 3
-; CHECK-NEXT:    sldi r11, r11, 3
-; CHECK-NEXT:    add r26, r12, r9
-; CHECK-NEXT:    add r16, r18, r9
-; CHECK-NEXT:    add r29, r12, r8
-; CHECK-NEXT:    add r19, r18, r8
-; CHECK-NEXT:    add r30, r12, r4
-; CHECK-NEXT:    mr r20, r4
-; CHECK-NEXT:    std r4, -200(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    ld r4, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    add r15, r5, r11
-; CHECK-NEXT:    sldi r11, r14, 3
-; CHECK-NEXT:    add r29, r5, r29
-; CHECK-NEXT:    add r28, r3, r26
-; CHECK-NEXT:    add r19, r5, r19
-; CHECK-NEXT:    add r21, r23, r9
-; CHECK-NEXT:    add r24, r23, r8
-; CHECK-NEXT:    add r14, r5, r11
-; CHECK-NEXT:    sldi r11, r31, 3
-; CHECK-NEXT:    add r25, r23, r20
-; CHECK-NEXT:    add r20, r18, r20
-; CHECK-NEXT:    add r30, r5, r30
-; CHECK-NEXT:    add r18, r3, r16
-; CHECK-NEXT:    add r24, r5, r24
-; CHECK-NEXT:    add r23, r3, r21
-; CHECK-NEXT:    add r27, r4, r26
-; CHECK-NEXT:    add r22, r4, r21
-; CHECK-NEXT:    add r17, r4, r16
-; CHECK-NEXT:    add r2, r4, r11
-; CHECK-NEXT:    rldicl r4, r7, 2, 1
-; CHECK-NEXT:    sub r7, r8, r9
-; CHECK-NEXT:    ld r8, -200(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r14, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mulli r24, r30, 24
+; CHECK-NEXT:    ld r16, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r15, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r3, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    rldicl r0, r7, 62, 2
+; CHECK-NEXT:    sldi r11, r30, 5
+; CHECK-NEXT:    sldi r19, r30, 4
+; CHECK-NEXT:    sldi r7, r14, 3
+; CHECK-NEXT:    add r14, r30, r14
+; CHECK-NEXT:    sldi r10, r16, 3
+; CHECK-NEXT:    sldi r12, r15, 3
+; CHECK-NEXT:    add r16, r30, r16
+; CHECK-NEXT:    add r15, r30, r15
+; CHECK-NEXT:    add r27, r11, r7
+; CHECK-NEXT:    add r22, r24, r7
+; CHECK-NEXT:    add r17, r19, r7
+; CHECK-NEXT:    sldi r2, r14, 3
+; CHECK-NEXT:    add r26, r24, r10
+; CHECK-NEXT:    add r25, r24, r12
+; CHECK-NEXT:    add r21, r19, r10
+; CHECK-NEXT:    add r20, r19, r12
+; CHECK-NEXT:    add r8, r11, r10
+; CHECK-NEXT:    sldi r16, r16, 3
+; CHECK-NEXT:    add r29, r5, r27
+; CHECK-NEXT:    add r28, r4, r27
+; CHECK-NEXT:    add r27, r3, r27
+; CHECK-NEXT:    add r24, r5, r22
+; CHECK-NEXT:    add r23, r4, r22
+; CHECK-NEXT:    add r22, r3, r22
+; CHECK-NEXT:    add r19, r5, r17
+; CHECK-NEXT:    add r18, r4, r17
+; CHECK-NEXT:    add r17, r3, r17
+; CHECK-NEXT:    add r14, r5, r2
+; CHECK-NEXT:    add r31, r4, r2
+; CHECK-NEXT:    add r2, r3, r2
+; CHECK-NEXT:    add r9, r5, r8
+; CHECK-NEXT:    add r8, r11, r12
 ; CHECK-NEXT:    add r26, r5, r26
 ; CHECK-NEXT:    add r25, r5, r25
 ; CHECK-NEXT:    add r21, r5, r21
 ; CHECK-NEXT:    add r20, r5, r20
 ; CHECK-NEXT:    add r16, r5, r16
-; CHECK-NEXT:    add r31, r5, r11
-; CHECK-NEXT:    add r11, r3, r11
-; CHECK-NEXT:    addi r4, r4, -4
-; CHECK-NEXT:    rldicl r4, r4, 62, 2
-; CHECK-NEXT:    sub r8, r8, r9
-; CHECK-NEXT:    li r9, 0
-; CHECK-NEXT:    addi r4, r4, 1
-; CHECK-NEXT:    mtctr r4
+; CHECK-NEXT:    add r8, r5, r8
+; CHECK-NEXT:    rldicl r3, r0, 2, 1
+; CHECK-NEXT:    addi r3, r3, -4
+; CHECK-NEXT:    sub r0, r12, r7
+; CHECK-NEXT:    sub r12, r10, r7
+; CHECK-NEXT:    li r7, 0
+; CHECK-NEXT:    mr r10, r30
+; CHECK-NEXT:    sldi r15, r15, 3
+; CHECK-NEXT:    add r15, r5, r15
+; CHECK-NEXT:    rldicl r3, r3, 62, 2
+; CHECK-NEXT:    addi r3, r3, 1
+; CHECK-NEXT:    mtctr r3
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB7_3: # %for.body
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lfd f0, 0(r11)
-; CHECK-NEXT:    lfd f1, 0(r2)
-; CHECK-NEXT:    add r0, r0, r10
-; CHECK-NEXT:    xsmuldp f0, f0, f1
+; CHECK-NEXT:    lfd f0, 0(r2)
 ; CHECK-NEXT:    lfd f1, 0(r31)
+; CHECK-NEXT:    add r3, r10, r30
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    xsmuldp f0, f0, f1
+; CHECK-NEXT:    lfd f1, 0(r14)
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    add r10, r3, r30
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfd f0, 0(r31)
-; CHECK-NEXT:    add r31, r31, r12
-; CHECK-NEXT:    lfdx f0, r11, r7
-; CHECK-NEXT:    lfdx f1, r2, r7
+; CHECK-NEXT:    stfd f0, 0(r14)
+; CHECK-NEXT:    add r14, r14, r11
+; CHECK-NEXT:    lfdx f0, r2, r0
+; CHECK-NEXT:    lfdx f1, r31, r0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r14, r9
+; CHECK-NEXT:    lfdx f1, r15, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r14, r9
-; CHECK-NEXT:    lfdx f0, r11, r8
-; CHECK-NEXT:    lfdx f1, r2, r8
-; CHECK-NEXT:    add r11, r11, r12
-; CHECK-NEXT:    add r2, r2, r12
+; CHECK-NEXT:    stfdx f0, r15, r7
+; CHECK-NEXT:    lfdx f0, r2, r12
+; CHECK-NEXT:    lfdx f1, r31, r12
+; CHECK-NEXT:    add r2, r2, r11
+; CHECK-NEXT:    add r31, r31, r11
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r15, r9
+; CHECK-NEXT:    lfdx f1, r16, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r15, r9
-; CHECK-NEXT:    lfd f0, 0(r18)
-; CHECK-NEXT:    lfd f1, 0(r17)
+; CHECK-NEXT:    stfdx f0, r16, r7
+; CHECK-NEXT:    lfd f0, 0(r17)
+; CHECK-NEXT:    lfd f1, 0(r18)
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r16, r9
+; CHECK-NEXT:    lfdx f1, r19, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r16, r9
-; CHECK-NEXT:    lfdx f0, r18, r7
-; CHECK-NEXT:    lfdx f1, r17, r7
+; CHECK-NEXT:    stfdx f0, r19, r7
+; CHECK-NEXT:    lfdx f0, r17, r0
+; CHECK-NEXT:    lfdx f1, r18, r0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r19, r9
+; CHECK-NEXT:    lfdx f1, r20, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r19, r9
-; CHECK-NEXT:    lfdx f0, r18, r8
-; CHECK-NEXT:    lfdx f1, r17, r8
-; CHECK-NEXT:    add r18, r18, r12
-; CHECK-NEXT:    add r17, r17, r12
+; CHECK-NEXT:    stfdx f0, r20, r7
+; CHECK-NEXT:    lfdx f0, r17, r12
+; CHECK-NEXT:    lfdx f1, r18, r12
+; CHECK-NEXT:    add r17, r17, r11
+; CHECK-NEXT:    add r18, r18, r11
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r20, r9
+; CHECK-NEXT:    lfdx f1, r21, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r20, r9
-; CHECK-NEXT:    lfd f0, 0(r23)
-; CHECK-NEXT:    lfd f1, 0(r22)
+; CHECK-NEXT:    stfdx f0, r21, r7
+; CHECK-NEXT:    lfd f0, 0(r22)
+; CHECK-NEXT:    lfd f1, 0(r23)
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r21, r9
+; CHECK-NEXT:    lfdx f1, r24, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r21, r9
-; CHECK-NEXT:    lfdx f0, r23, r7
-; CHECK-NEXT:    lfdx f1, r22, r7
+; CHECK-NEXT:    stfdx f0, r24, r7
+; CHECK-NEXT:    lfdx f0, r22, r0
+; CHECK-NEXT:    lfdx f1, r23, r0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r24, r9
+; CHECK-NEXT:    lfdx f1, r25, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r24, r9
-; CHECK-NEXT:    lfdx f0, r23, r8
-; CHECK-NEXT:    lfdx f1, r22, r8
-; CHECK-NEXT:    add r23, r23, r12
-; CHECK-NEXT:    add r22, r22, r12
+; CHECK-NEXT:    stfdx f0, r25, r7
+; CHECK-NEXT:    lfdx f0, r22, r12
+; CHECK-NEXT:    lfdx f1, r23, r12
+; CHECK-NEXT:    add r22, r22, r11
+; CHECK-NEXT:    add r23, r23, r11
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r25, r9
+; CHECK-NEXT:    lfdx f1, r26, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r25, r9
-; CHECK-NEXT:    lfd f0, 0(r28)
-; CHECK-NEXT:    lfd f1, 0(r27)
+; CHECK-NEXT:    stfdx f0, r26, r7
+; CHECK-NEXT:    lfd f0, 0(r27)
+; CHECK-NEXT:    lfd f1, 0(r28)
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r26, r9
+; CHECK-NEXT:    lfdx f1, r29, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r26, r9
-; CHECK-NEXT:    lfdx f0, r28, r7
-; CHECK-NEXT:    lfdx f1, r27, r7
+; CHECK-NEXT:    stfdx f0, r29, r7
+; CHECK-NEXT:    lfdx f0, r27, r0
+; CHECK-NEXT:    lfdx f1, r28, r0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r29, r9
+; CHECK-NEXT:    lfdx f1, r8, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r29, r9
-; CHECK-NEXT:    lfdx f0, r28, r8
-; CHECK-NEXT:    lfdx f1, r27, r8
-; CHECK-NEXT:    add r28, r28, r12
-; CHECK-NEXT:    add r27, r27, r12
+; CHECK-NEXT:    stfdx f0, r8, r7
+; CHECK-NEXT:    lfdx f0, r27, r12
+; CHECK-NEXT:    lfdx f1, r28, r12
+; CHECK-NEXT:    add r27, r27, r11
+; CHECK-NEXT:    add r28, r28, r11
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r30, r9
+; CHECK-NEXT:    lfdx f1, r9, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r30, r9
-; CHECK-NEXT:    add r9, r9, r12
+; CHECK-NEXT:    stfdx f0, r9, r7
+; CHECK-NEXT:    add r7, r7, r11
 ; CHECK-NEXT:    bdnz .LBB7_3
 ; CHECK-NEXT:  .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa
-; CHECK-NEXT:    ld r7, -192(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    cmpldi r6, 0
 ; CHECK-NEXT:    beq cr0, .LBB7_7
 ; CHECK-NEXT:  # %bb.5: # %for.body.epil.preheader
-; CHECK-NEXT:    ld r4, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    sldi r7, r7, 3
-; CHECK-NEXT:    add r4, r0, r4
-; CHECK-NEXT:    sldi r4, r4, 3
-; CHECK-NEXT:    add r3, r5, r4
-; CHECK-NEXT:    add r8, r29, r4
-; CHECK-NEXT:    add r9, r30, r4
-; CHECK-NEXT:    ld r4, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    add r4, r0, r4
-; CHECK-NEXT:    sldi r4, r4, 3
-; CHECK-NEXT:    add r10, r5, r4
-; CHECK-NEXT:    add r11, r29, r4
-; CHECK-NEXT:    add r12, r30, r4
-; CHECK-NEXT:    ld r4, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    add r4, r0, r4
-; CHECK-NEXT:    sldi r0, r4, 3
-; CHECK-NEXT:    add r5, r5, r0
-; CHECK-NEXT:    add r4, r29, r0
-; CHECK-NEXT:    add r30, r30, r0
-; CHECK-NEXT:    li r0, 0
+; CHECK-NEXT:    ld r3, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r0, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    sldi r8, r30, 3
+; CHECK-NEXT:    add r3, r10, r3
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    add r7, r5, r3
+; CHECK-NEXT:    add r9, r4, r3
+; CHECK-NEXT:    add r11, r0, r3
+; CHECK-NEXT:    ld r3, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    add r3, r10, r3
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    add r12, r5, r3
+; CHECK-NEXT:    add r30, r4, r3
+; CHECK-NEXT:    add r29, r0, r3
+; CHECK-NEXT:    ld r3, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    add r3, r10, r3
+; CHECK-NEXT:    li r10, 0
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    add r5, r5, r3
+; CHECK-NEXT:    add r4, r4, r3
+; CHECK-NEXT:    add r3, r0, r3
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB7_6: # %for.body.epil
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lfdx f0, r30, r0
-; CHECK-NEXT:    lfdx f1, r4, r0
+; CHECK-NEXT:    lfdx f0, r3, r10
+; CHECK-NEXT:    lfdx f1, r4, r10
 ; CHECK-NEXT:    addi r6, r6, -1
 ; CHECK-NEXT:    cmpldi r6, 0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
 ; CHECK-NEXT:    lfd f1, 0(r5)
 ; CHECK-NEXT:    xsadddp f0, f1, f0
 ; CHECK-NEXT:    stfd f0, 0(r5)
-; CHECK-NEXT:    add r5, r5, r7
-; CHECK-NEXT:    lfdx f0, r12, r0
-; CHECK-NEXT:    lfdx f1, r11, r0
+; CHECK-NEXT:    add r5, r5, r8
+; CHECK-NEXT:    lfdx f0, r29, r10
+; CHECK-NEXT:    lfdx f1, r30, r10
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r10, r0
+; CHECK-NEXT:    lfdx f1, r12, r10
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r10, r0
-; CHECK-NEXT:    lfdx f0, r9, r0
-; CHECK-NEXT:    lfdx f1, r8, r0
+; CHECK-NEXT:    stfdx f0, r12, r10
+; CHECK-NEXT:    lfdx f0, r11, r10
+; CHECK-NEXT:    lfdx f1, r9, r10
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r3, r0
+; CHECK-NEXT:    lfdx f1, r7, r10
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r3, r0
-; CHECK-NEXT:    add r0, r0, r7
+; CHECK-NEXT:    stfdx f0, r7, r10
+; CHECK-NEXT:    add r10, r10, r8
 ; CHECK-NEXT:    bne cr0, .LBB7_6
 ; CHECK-NEXT:  .LBB7_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ld r2, -152(r1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
index c733a01950603..4b032781c3764 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
@@ -30,16 +30,14 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stw r12, 8(r1)
-; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r29, -24
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    .cfi_offset cr2, 8
 ; CHECK-NEXT:    .cfi_offset cr3, 8
 ; CHECK-NEXT:    .cfi_offset cr4, 8
-; CHECK-NEXT:    std r29, 40(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, 48(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, 32(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    bl call_2 at notoc
 ; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_13
 ; CHECK-NEXT:  # %bb.1: # %bb
@@ -67,11 +65,10 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-NEXT:    bc 12, 4*cr3+eq, .LBB0_11
 ; CHECK-NEXT:  # %bb.6: # %bb32
 ; CHECK-NEXT:    #
+; CHECK-NEXT:    rlwinm r30, r30, 0, 24, 22
 ; CHECK-NEXT:    andi. r3, r30, 2
-; CHECK-NEXT:    rlwinm r29, r30, 0, 24, 22
 ; CHECK-NEXT:    mcrf cr2, cr0
 ; CHECK-NEXT:    bl call_4 at notoc
-; CHECK-NEXT:    mr r30, r29
 ; CHECK-NEXT:    beq+ cr2, .LBB0_3
 ; CHECK-NEXT:  # %bb.7: # %bb37
 ; CHECK-NEXT:  .LBB0_8: # %bb22
@@ -92,13 +89,11 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-BE-NEXT:    stdu r1, -144(r1)
 ; CHECK-BE-NEXT:    .cfi_def_cfa_offset 144
 ; CHECK-BE-NEXT:    .cfi_offset lr, 16
-; CHECK-BE-NEXT:    .cfi_offset r28, -32
 ; CHECK-BE-NEXT:    .cfi_offset r29, -24
 ; CHECK-BE-NEXT:    .cfi_offset r30, -16
 ; CHECK-BE-NEXT:    .cfi_offset cr2, 8
 ; CHECK-BE-NEXT:    .cfi_offset cr2, 8
 ; CHECK-BE-NEXT:    .cfi_offset cr2, 8
-; CHECK-BE-NEXT:    std r28, 112(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    std r29, 120(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    bl call_2
@@ -131,12 +126,11 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-BE-NEXT:    bc 12, 4*cr3+eq, .LBB0_11
 ; CHECK-BE-NEXT:  # %bb.6: # %bb32
 ; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    rlwinm r29, r29, 0, 24, 22
 ; CHECK-BE-NEXT:    andi. r3, r29, 2
-; CHECK-BE-NEXT:    rlwinm r28, r29, 0, 24, 22
 ; CHECK-BE-NEXT:    mcrf cr2, cr0
 ; CHECK-BE-NEXT:    bl call_4
 ; CHECK-BE-NEXT:    nop
-; CHECK-BE-NEXT:    mr r29, r28
 ; CHECK-BE-NEXT:    beq+ cr2, .LBB0_3
 ; CHECK-BE-NEXT:  # %bb.7: # %bb37
 ; CHECK-BE-NEXT:  .LBB0_8: # %bb22
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
deleted f...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/100094


More information about the llvm-branch-commits mailing list