[llvm] 946e69d - [PowerPC] prepare more loop load/store instructions

Chen Zheng via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 13 22:01:19 PDT 2021


Author: Chen Zheng
Date: 2021-09-14T05:00:48Z
New Revision: 946e69d253f23d62ed56f8f969414bb07a655e4e

URL: https://github.com/llvm/llvm-project/commit/946e69d253f23d62ed56f8f969414bb07a655e4e
DIFF: https://github.com/llvm/llvm-project/commit/946e69d253f23d62ed56f8f969414bb07a655e4e.diff

LOG: [PowerPC] prepare more loop load/store instructions

PPCLoopInstrFormPrep pass now can prepare for load store instructions
in a loop whose increment is not a constant integer.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D105872

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
    llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
    llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 016aa4cbba8b5..5cdd78c732a91 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -186,8 +186,11 @@ namespace {
     /// Check if required PHI node is already exist in Loop \p L.
     bool alreadyPrepared(Loop *L, Instruction *MemI,
                          const SCEV *BasePtrStartSCEV,
-                         const SCEVConstant *BasePtrIncSCEV,
-                         InstrForm Form);
+                         const SCEV *BasePtrIncSCEV, InstrForm Form);
+
+    /// Get the value which defines the increment SCEV \p BasePtrIncSCEV.
+    Value *getNodeForInc(Loop *L, Instruction *MemI,
+                         const SCEV *BasePtrIncSCEV);
 
     /// Collect condition matched(\p isValidCandidate() returns true)
     /// candidates in Loop \p L.
@@ -514,28 +517,49 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
   if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L))
     return MadeChange;
 
-  const SCEVConstant *BasePtrIncSCEV =
-    dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
-  if (!BasePtrIncSCEV)
+  bool IsConstantInc = false;
+  const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE);
+  Value *IncNode = getNodeForInc(L, MemI, BasePtrIncSCEV);
+
+  const SCEVConstant *BasePtrIncConstantSCEV =
+      dyn_cast<SCEVConstant>(BasePtrIncSCEV);
+  if (BasePtrIncConstantSCEV)
+    IsConstantInc = true;
+
+  // No valid representation for the increment.
+  if (!IncNode) {
+    LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n");
     return MadeChange;
+  }
+
+  // Now we only handle update form for constant increment.
+  // FIXME: add support for non-constant increment UpdateForm.
+  if (!IsConstantInc && Form == UpdateForm) {
+    LLVM_DEBUG(dbgs() << "not a constant increment for update form!\n");
+    return MadeChange;
+  }
 
   // For some DS form load/store instructions, it can also be an update form,
-  // if the stride is a multipler of 4. Use update form if prefer it.
-  bool CanPreInc = (Form == UpdateForm ||
-                    ((Form == DSForm) && !BasePtrIncSCEV->getAPInt().urem(4) &&
-                     PreferUpdateForm));
+  // if the stride is constant and is a multipler of 4. Use update form if
+  // prefer it.
+  bool CanPreInc =
+      (Form == UpdateForm ||
+       ((Form == DSForm) && IsConstantInc &&
+        !BasePtrIncConstantSCEV->getAPInt().urem(4) && PreferUpdateForm));
   const SCEV *BasePtrStartSCEV = nullptr;
   if (CanPreInc)
     BasePtrStartSCEV =
-        SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncSCEV);
+        SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncConstantSCEV);
   else
     BasePtrStartSCEV = BasePtrSCEV->getStart();
 
   if (!isSafeToExpand(BasePtrStartSCEV, *SE))
     return MadeChange;
 
-  if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form))
+  if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) {
+    LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n");
     return MadeChange;
+  }
 
   LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
 
@@ -564,9 +588,11 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
   Instruction *PtrInc = nullptr;
   Instruction *NewBasePtr = nullptr;
   if (CanPreInc) {
+    assert(BasePtrIncConstantSCEV &&
+           "update form now only supports constant increment.");
     Instruction *InsPoint = &*Header->getFirstInsertionPt();
     PtrInc = GetElementPtrInst::Create(
-        I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+        I8Ty, NewPHI, BasePtrIncConstantSCEV->getValue(),
         getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
     cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
     for (auto PI : predecessors(Header)) {
@@ -593,9 +619,8 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
       BasicBlock *BB = PI;
       Instruction *InsPoint = BB->getTerminator();
       PtrInc = GetElementPtrInst::Create(
-          I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
-          getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
-
+          I8Ty, NewPHI, IncNode, getInstrName(MemI, GEPNodeIncNameSuffix),
+          InsPoint);
       cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
 
       NewPHI->addIncoming(PtrInc, PI);
@@ -725,14 +750,90 @@ bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Bucket
   return MadeChange;
 }
 
+// Find the loop invariant increment node for SCEV BasePtrIncSCEV.
+// bb.loop.preheader:
+//   %start = ...
+// bb.loop.body:
+//   %phinode = phi [ %start, %bb.loop.preheader ], [ %add, %bb.loop.body ]
+//   ...
+//   %add = add %phinode, %inc  ; %inc is what we want to get.
+//
+Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI,
+                                           const SCEV *BasePtrIncSCEV) {
+  // If the increment is a constant, no definition is needed.
+  // Return the value directly.
+  if (isa<SCEVConstant>(BasePtrIncSCEV))
+    return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();
+
+  if (!SE->isLoopInvariant(BasePtrIncSCEV, L))
+    return nullptr;
+
+  BasicBlock *BB = MemI->getParent();
+  if (!BB)
+    return nullptr;
+
+  BasicBlock *LatchBB = L->getLoopLatch();
+
+  if (!LatchBB)
+    return nullptr;
+
+  // Run through the PHIs and check their operands to find valid representation
+  // for the increment SCEV.
+  iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
+  for (auto &CurrentPHI : PHIIter) {
+    PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
+    if (!CurrentPHINode)
+      continue;
+
+    if (!SE->isSCEVable(CurrentPHINode->getType()))
+      continue;
+
+    const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
+
+    const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
+    if (!PHIBasePtrSCEV)
+      continue;
+
+    const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE);
+
+    if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV))
+      continue;
+
+    // Get the incoming value from the loop latch and check if the value has
+    // the add form with the required increment.
+    if (Instruction *I = dyn_cast<Instruction>(
+            CurrentPHINode->getIncomingValueForBlock(LatchBB))) {
+      Value *StrippedBaseI = I;
+      while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBaseI))
+        StrippedBaseI = BC->getOperand(0);
+
+      Instruction *StrippedI = dyn_cast<Instruction>(StrippedBaseI);
+      if (!StrippedI)
+        continue;
+
+      // LSR pass may add a getelementptr instruction to do the loop increment,
+      // also search in that getelementptr instruction.
+      if (StrippedI->getOpcode() == Instruction::Add ||
+          (StrippedI->getOpcode() == Instruction::GetElementPtr &&
+           StrippedI->getNumOperands() == 2)) {
+        if (SE->getSCEVAtScope(StrippedI->getOperand(0), L) == BasePtrIncSCEV)
+          return StrippedI->getOperand(0);
+        if (SE->getSCEVAtScope(StrippedI->getOperand(1), L) == BasePtrIncSCEV)
+          return StrippedI->getOperand(1);
+      }
+    }
+  }
+  return nullptr;
+}
+
 // In order to prepare for the preferred instruction form, a PHI is added.
 // This function will check to see if that PHI already exists and will return
 // true if it found an existing PHI with the matched start and increment as the
 // one we wanted to create.
 bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
-                                        const SCEV *BasePtrStartSCEV,
-                                        const SCEVConstant *BasePtrIncSCEV,
-                                        InstrForm Form) {
+                                           const SCEV *BasePtrStartSCEV,
+                                           const SCEV *BasePtrIncSCEV,
+                                           InstrForm Form) {
   BasicBlock *BB = MemI->getParent();
   if (!BB)
     return false;

diff  --git a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
index be15f456408f5..2db66b68d8d62 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -disable-lsr -ppc-asm-full-reg-names -verify-machineinstrs \
 ; RUN:   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
 
-; FIXME: PPCLoopInstrFormPrep should be able to common base for "(unsigned long long *)(p + j + 5)"
-; and "(unsigned long long *)(p + j + 9)", thus we only have two DS form load inside the loop.
-
 ; long long foo(char *p, int n, int count) {
 ;   int j = 0;
 ;   long long sum = 0;
@@ -22,31 +19,26 @@ define i64 @foo(i8* %p, i32 signext %n, i32 signext %count) {
 ; CHECK-NEXT:    cmpwi r4, 1
 ; CHECK-NEXT:    blt cr0, .LBB0_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    addi r4, r4, -1
+; CHECK-NEXT:    addi r6, r3, 5
+; CHECK-NEXT:    addi r3, r4, -1
 ; CHECK-NEXT:    extsw r5, r5
-; CHECK-NEXT:    li r6, 0
-; CHECK-NEXT:    li r7, 5
-; CHECK-NEXT:    li r8, 9
-; CHECK-NEXT:    clrldi r4, r4, 32
-; CHECK-NEXT:    addi r4, r4, 1
-; CHECK-NEXT:    mtctr r4
-; CHECK-NEXT:    li r4, 0
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    addi r3, r3, 1
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_2: # %for.body
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    add r9, r3, r6
+; CHECK-NEXT:    ld r4, 0(r6)
+; CHECK-NEXT:    add r3, r4, r3
+; CHECK-NEXT:    ld r4, 4(r6)
 ; CHECK-NEXT:    add r6, r6, r5
-; CHECK-NEXT:    ldx r10, r9, r7
-; CHECK-NEXT:    ldx r9, r9, r8
-; CHECK-NEXT:    add r4, r10, r4
-; CHECK-NEXT:    add r4, r4, r9
+; CHECK-NEXT:    add r3, r3, r4
 ; CHECK-NEXT:    bdnz .LBB0_2
 ; CHECK-NEXT:  # %bb.3: # %for.cond.cleanup
-; CHECK-NEXT:    mr r3, r4
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB0_4:
-; CHECK-NEXT:    li r4, 0
-; CHECK-NEXT:    mr r3, r4
+; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    blr
 entry:
   %cmp16 = icmp sgt i32 %n, 0

diff  --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
index 0c030bf6601b5..346353bc12d0a 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
@@ -6,6 +6,10 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpd 5, 7
+; CHECK-NEXT:    std 19, -104(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 20, -96(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 21, -88(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 22, -80(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 24, -64(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 25, -56(1) # 8-byte Folded Spill
@@ -17,89 +21,95 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
 ; CHECK-NEXT:    bge 0, .LBB0_6
 ; CHECK-NEXT:  # %bb.1: # %.preheader
 ; CHECK-NEXT:    addi 30, 5, 1
-; CHECK-NEXT:    addi 29, 5, 3
-; CHECK-NEXT:    addi 28, 5, 2
+; CHECK-NEXT:    addi 28, 5, 3
+; CHECK-NEXT:    addi 27, 5, 2
 ; CHECK-NEXT:    mulld 12, 8, 5
-; CHECK-NEXT:    addi 3, 3, 16
+; CHECK-NEXT:    addi 29, 3, 16
 ; CHECK-NEXT:    mulld 0, 9, 8
-; CHECK-NEXT:    sldi 11, 10, 3
+; CHECK-NEXT:    mr 25, 12
 ; CHECK-NEXT:    mulld 30, 8, 30
-; CHECK-NEXT:    mulld 29, 8, 29
-; CHECK-NEXT:    mulld 8, 8, 28
+; CHECK-NEXT:    mulld 28, 8, 28
+; CHECK-NEXT:    mulld 8, 8, 27
+; CHECK-NEXT:    sldi 11, 10, 3
+; CHECK-NEXT:    li 27, 0
+; CHECK-NEXT:    mr 26, 30
 ; CHECK-NEXT:    b .LBB0_3
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_2:
 ; CHECK-NEXT:    add 5, 5, 9
-; CHECK-NEXT:    add 12, 12, 0
-; CHECK-NEXT:    add 30, 30, 0
-; CHECK-NEXT:    add 29, 29, 0
+; CHECK-NEXT:    add 25, 25, 0
+; CHECK-NEXT:    add 26, 26, 0
+; CHECK-NEXT:    add 28, 28, 0
 ; CHECK-NEXT:    add 8, 8, 0
+; CHECK-NEXT:    addi 27, 27, 1
 ; CHECK-NEXT:    cmpd 5, 7
 ; CHECK-NEXT:    bge 0, .LBB0_6
 ; CHECK-NEXT:  .LBB0_3: # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB0_5 Depth 2
-; CHECK-NEXT:    sub 28, 5, 10
-; CHECK-NEXT:    cmpd 6, 28
+; CHECK-NEXT:    sub 24, 5, 10
+; CHECK-NEXT:    cmpd 6, 24
 ; CHECK-NEXT:    bge 0, .LBB0_2
 ; CHECK-NEXT:  # %bb.4:
-; CHECK-NEXT:    add 26, 6, 12
-; CHECK-NEXT:    add 25, 6, 30
-; CHECK-NEXT:    add 24, 6, 29
-; CHECK-NEXT:    add 23, 6, 8
-; CHECK-NEXT:    sldi 27, 6, 3
-; CHECK-NEXT:    sldi 26, 26, 3
-; CHECK-NEXT:    sldi 25, 25, 3
-; CHECK-NEXT:    sldi 24, 24, 3
-; CHECK-NEXT:    sldi 23, 23, 3
-; CHECK-NEXT:    add 27, 4, 27
-; CHECK-NEXT:    add 26, 3, 26
-; CHECK-NEXT:    add 25, 3, 25
-; CHECK-NEXT:    add 24, 3, 24
-; CHECK-NEXT:    add 23, 3, 23
+; CHECK-NEXT:    maddld 19, 0, 27, 30
+; CHECK-NEXT:    maddld 20, 0, 27, 12
+; CHECK-NEXT:    add 22, 6, 28
+; CHECK-NEXT:    add 21, 6, 8
+; CHECK-NEXT:    add 20, 6, 20
+; CHECK-NEXT:    add 19, 6, 19
+; CHECK-NEXT:    sldi 23, 6, 3
+; CHECK-NEXT:    sldi 22, 22, 3
+; CHECK-NEXT:    sldi 21, 21, 3
+; CHECK-NEXT:    add 23, 4, 23
+; CHECK-NEXT:    add 22, 29, 22
+; CHECK-NEXT:    add 21, 29, 21
+; CHECK-NEXT:    sldi 20, 20, 3
+; CHECK-NEXT:    sldi 19, 19, 3
+; CHECK-NEXT:    add 20, 3, 20
+; CHECK-NEXT:    add 19, 3, 19
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_5: # Parent Loop BB0_3 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    lfd 0, 0(27)
-; CHECK-NEXT:    lfd 1, -16(26)
+; CHECK-NEXT:    lfd 0, 0(23)
+; CHECK-NEXT:    lfd 1, 0(20)
 ; CHECK-NEXT:    add 6, 6, 10
-; CHECK-NEXT:    cmpd 6, 28
+; CHECK-NEXT:    cmpd 6, 24
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(26)
+; CHECK-NEXT:    lfd 1, 8(20)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(26)
+; CHECK-NEXT:    lfd 1, 16(20)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(26)
-; CHECK-NEXT:    add 26, 26, 11
+; CHECK-NEXT:    lfd 1, 24(20)
+; CHECK-NEXT:    add 20, 20, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -16(25)
+; CHECK-NEXT:    lfd 1, 0(19)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(25)
+; CHECK-NEXT:    lfd 1, 8(19)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(25)
+; CHECK-NEXT:    lfd 1, 16(19)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(25)
-; CHECK-NEXT:    add 25, 25, 11
+; CHECK-NEXT:    lfd 1, 24(19)
+; CHECK-NEXT:    add 19, 19, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -16(23)
+; CHECK-NEXT:    lfd 1, -16(21)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(23)
+; CHECK-NEXT:    lfd 1, -8(21)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(23)
+; CHECK-NEXT:    lfd 1, 0(21)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(23)
-; CHECK-NEXT:    add 23, 23, 11
+; CHECK-NEXT:    lfd 1, 8(21)
+; CHECK-NEXT:    add 21, 21, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -16(24)
+; CHECK-NEXT:    lfd 1, -16(22)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(24)
+; CHECK-NEXT:    lfd 1, -8(22)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(24)
+; CHECK-NEXT:    lfd 1, 0(22)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(24)
-; CHECK-NEXT:    add 24, 24, 11
+; CHECK-NEXT:    lfd 1, 8(22)
+; CHECK-NEXT:    add 22, 22, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    stfd 0, 0(27)
-; CHECK-NEXT:    add 27, 27, 11
+; CHECK-NEXT:    stfd 0, 0(23)
+; CHECK-NEXT:    add 23, 23, 11
 ; CHECK-NEXT:    blt 0, .LBB0_5
 ; CHECK-NEXT:    b .LBB0_2
 ; CHECK-NEXT:  .LBB0_6:
@@ -111,6 +121,10 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
 ; CHECK-NEXT:    ld 25, -56(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 24, -64(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 23, -72(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 22, -80(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 21, -88(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 20, -96(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 19, -104(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
   %9 = icmp slt i64 %2, %4
   br i1 %9, label %10, label %97


        


More information about the llvm-commits mailing list