[llvm-branch-commits] [llvm] 5a87328 - more ds/dq preparation

Chen Zheng via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Aug 17 22:15:11 PDT 2021


Author: Chen Zheng
Date: 2021-07-15T07:54:49Z
New Revision: 5a8732852b4d7225acaa347f705798fe7d61e92c

URL: https://github.com/llvm/llvm-project/commit/5a8732852b4d7225acaa347f705798fe7d61e92c
DIFF: https://github.com/llvm/llvm-project/commit/5a8732852b4d7225acaa347f705798fe7d61e92c.diff

LOG: more ds/dq preparation

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
    llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
    llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 1d2b1ed3f6269..5f08268277a0e 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -169,7 +169,7 @@ namespace {
 
   private:
     PPCTargetMachine *TM = nullptr;
-    const PPCSubtarget *ST; 
+    const PPCSubtarget *ST;
     DominatorTree *DT;
     LoopInfo *LI;
     ScalarEvolution *SE;
@@ -184,10 +184,13 @@ namespace {
     bool runOnLoop(Loop *L);
 
     /// Check if required PHI node is already exist in Loop \p L.
-    bool alreadyPrepared(Loop *L, Instruction* MemI,
+    bool alreadyPrepared(Loop *L, Instruction *MemI,
                          const SCEV *BasePtrStartSCEV,
-                         const SCEVConstant *BasePtrIncSCEV,
-                         InstrForm Form);
+                         const SCEV *BasePtrIncSCEV, InstrForm Form);
+
+    /// Get the value which defines the increment SCEV \p BasePtrIncSCEV.
+    Value *getPreparedIncNode(Loop *L, Instruction *MemI,
+                              const SCEV *BasePtrIncSCEV);
 
     /// Collect condition matched(\p isValidCandidate() returns true)
     /// candidates in Loop \p L.
@@ -266,7 +269,7 @@ static std::string getInstrName(const Value *I, StringRef Suffix) {
   if (I->hasName())
     return (I->getName() + Suffix).str();
   else
-    return ""; 
+    return "";
 }
 
 static Value *GetPointerOperand(Value *MemI) {
@@ -404,13 +407,13 @@ bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
   // contains following load/stores with 
diff erent remainders:
   // 1: 10 load/store whose remainder is 1;
   // 2: 9 load/store whose remainder is 2;
-  // 3: 1 for remainder 3 and 0 for remainder 0; 
+  // 3: 1 for remainder 3 and 0 for remainder 0;
   // Now we will choose the first load/store whose remainder is 1 as base and
   // adjust all other load/stores according to new base, so we will get 10 DS
   // form and 10 X form.
   // But we should be more clever, for this case we could use two bases, one for
-  // remainder 1 and the other for remainder 2, thus we could get 19 DS form and 1
-  // X form.
+  // remainder 1 and the other for remainder 2, thus we could get 19 DS form and
+  // 1 X form.
   unsigned MaxCountRemainder = 0;
   for (unsigned j = 0; j < (unsigned)Form; j++)
     if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
@@ -515,28 +518,48 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
   if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L))
     return MadeChange;
 
-  const SCEVConstant *BasePtrIncSCEV =
-    dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
-  if (!BasePtrIncSCEV)
+  bool IsConstantInc = false;
+  const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE);
+  Value *IncNode = getPreparedIncNode(L, MemI, BasePtrIncSCEV);
+
+  const SCEVConstant *BasePtrIncConstantSCEV =
+      dyn_cast<SCEVConstant>(BasePtrIncSCEV);
+  if (BasePtrIncConstantSCEV)
+    IsConstantInc = true;
+
+  // No valid representation for the increment.
+  if (!IncNode) {
+    LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n");
     return MadeChange;
+  }
+
+  // Now we only handle update form for constant increment.
+  // FIXME: add support for non-constant increment UpdateForm.
+  if (!IsConstantInc && Form == UpdateForm) {
+    LLVM_DEBUG(dbgs() << "not a constant incresement for update form!\n");
+    return MadeChange;
+  }
 
   // For some DS form load/store instructions, it can also be an update form,
   // if the stride is a multipler of 4. Use update form if prefer it.
-  bool CanPreInc = (Form == UpdateForm ||
-                    ((Form == DSForm) && !BasePtrIncSCEV->getAPInt().urem(4) &&
-                     PreferUpdateForm));
+  bool CanPreInc =
+      (Form == UpdateForm ||
+       ((Form == DSForm) && IsConstantInc &&
+        !BasePtrIncConstantSCEV->getAPInt().urem(4) && PreferUpdateForm));
   const SCEV *BasePtrStartSCEV = nullptr;
   if (CanPreInc)
     BasePtrStartSCEV =
-        SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncSCEV);
+        SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncConstantSCEV);
   else
     BasePtrStartSCEV = BasePtrSCEV->getStart();
 
   if (!isSafeToExpand(BasePtrStartSCEV, *SE))
     return MadeChange;
 
-  if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form))
+  if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) {
+    LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n");
     return MadeChange;
+  }
 
   LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
 
@@ -565,9 +588,11 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
   Instruction *PtrInc = nullptr;
   Instruction *NewBasePtr = nullptr;
   if (CanPreInc) {
+    assert(BasePtrIncConstantSCEV &&
+           "update form now only supports constant increment.");
     Instruction *InsPoint = &*Header->getFirstInsertionPt();
     PtrInc = GetElementPtrInst::Create(
-        I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+        I8Ty, NewPHI, BasePtrIncConstantSCEV->getValue(),
         getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
     cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
     for (auto PI : predecessors(Header)) {
@@ -594,9 +619,8 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
       BasicBlock *BB = PI;
       Instruction *InsPoint = BB->getTerminator();
       PtrInc = GetElementPtrInst::Create(
-          I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
-          getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
-
+          I8Ty, NewPHI, IncNode, getInstrName(MemI, GEPNodeIncNameSuffix),
+          InsPoint);
       cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
 
       NewPHI->addIncoming(PtrInc, PI);
@@ -673,7 +697,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
 
   MadeChange = true;
 
-  SuccPrepCount++;  
+  SuccPrepCount++;
 
   if (Form == DSForm && !CanPreInc)
     DSFormChainRewritten++;
@@ -726,14 +750,98 @@ bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Bucket
   return MadeChange;
 }
 
+Value *PPCLoopInstrFormPrep::getPreparedIncNode(Loop *L, Instruction *MemI,
+                                                const SCEV *BasePtrIncSCEV) {
+  if (isa<SCEVConstant>(BasePtrIncSCEV))
+    return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();
+
+  if (!SE->isLoopInvariant(BasePtrIncSCEV, L))
+    return nullptr;
+
+  BasicBlock *BB = MemI->getParent();
+  if (!BB)
+    return nullptr;
+
+  BasicBlock *PredBB = L->getLoopPredecessor();
+  BasicBlock *LatchBB = L->getLoopLatch();
+
+  if (!PredBB || !LatchBB)
+    return nullptr;
+
+  auto getExistingNode = [&](Instruction *I) -> Value * {
+    Value *StrippedBasePtr = I;
+    while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr)) {
+      // We only check bitcast instruction with only 1 user here for compiling
+      // time considering.
+      if (BC->hasOneUser())
+        StrippedBasePtr = *BC->users().begin();
+      else
+        break;
+    }
+
+    Instruction *StrippedI = dyn_cast<Instruction>(StrippedBasePtr);
+    if (!StrippedI)
+      return nullptr;
+
+    // LSR pass may add a getelementptr instruction to do the loop increment,
+    // also search in that getelementptr instruction.
+    if (StrippedI->getOpcode() == Instruction::Add ||
+        (StrippedI->getOpcode() == Instruction::GetElementPtr &&
+         StrippedI->getNumOperands() == 2)) {
+      if (SE->getSCEVAtScope(StrippedI->getOperand(0), L) == BasePtrIncSCEV)
+        return StrippedI->getOperand(0);
+      if (SE->getSCEVAtScope(StrippedI->getOperand(1), L) == BasePtrIncSCEV)
+        return StrippedI->getOperand(1);
+      return nullptr;
+    }
+    return nullptr;
+  };
+
+  // Run through the PHIs and check their add users to find valid representation
+  // for the increment SCEV.
+  iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
+  for (auto &CurrentPHI : PHIIter) {
+    PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
+    if (!CurrentPHINode)
+      continue;
+
+    if (!SE->isSCEVable(CurrentPHINode->getType()))
+      continue;
+
+    const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
+
+    const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
+    if (!PHIBasePtrSCEV)
+      continue;
+
+    const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE);
+    if (!PHIBasePtrIncSCEV)
+      continue;
+
+    if (CurrentPHINode->getNumIncomingValues() == 2) {
+      if ((CurrentPHINode->getIncomingBlock(0) == LatchBB &&
+           CurrentPHINode->getIncomingBlock(1) == PredBB) ||
+          (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
+           CurrentPHINode->getIncomingBlock(0) == PredBB)) {
+        if (PHIBasePtrIncSCEV == BasePtrIncSCEV)
+          for (User *User : CurrentPHINode->users())
+            if (Instruction *I = dyn_cast<Instruction>(User))
+              if (Value *IncNode = getExistingNode(I))
+                return IncNode;
+      }
+    }
+  }
+  return nullptr;
+}
+
 // In order to prepare for the preferred instruction form, a PHI is added.
 // This function will check to see if that PHI already exists and will return
 // true if it found an existing PHI with the matched start and increment as the
 // one we wanted to create.
-bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
-                                        const SCEV *BasePtrStartSCEV,
-                                        const SCEVConstant *BasePtrIncSCEV,
-                                        InstrForm Form) {
+bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
+                                           const SCEV *BasePtrStartSCEV,
+                                           const SCEV *BasePtrIncSCEV,
+                                           InstrForm Form) {
   BasicBlock *BB = MemI->getParent();
   if (!BB)
     return false;
@@ -777,7 +885,7 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
               PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
             ++PHINodeAlreadyExistsUpdate;
             return true;
-          } 
+          }
           if (Form == DSForm || Form == DQForm) {
             const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
                 SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));
@@ -788,7 +896,7 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
                 ++PHINodeAlreadyExistsDQ;
               return true;
             }
-          } 
+          }
         }
       }
     }

diff  --git a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
index 8f6717420eb68..6132074004305 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -disable-lsr -ppc-asm-full-reg-names -verify-machineinstrs \
 ; RUN:   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
 
-; FIXME: PPCLoopInstrFormPrep should be able to common base for "(unsigned long long *)(p + j + 5)"
-; and "(unsigned long long *)(p + j + 9)", thus we only have two DS form load inside the loop.
-
 ; long long foo(char *p, int n, int count) {
 ;   int j = 0;
 ;   long long sum = 0;
@@ -22,29 +19,24 @@ define i64 @foo(i8* %p, i32 signext %n, i32 signext %count) {
 ; CHECK-NEXT:    cmpwi r4, 1
 ; CHECK-NEXT:    blt cr0, .LBB0_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    clrldi r4, r4, 32
+; CHECK-NEXT:    addi r6, r3, 5
+; CHECK-NEXT:    clrldi r3, r4, 32
 ; CHECK-NEXT:    extsw r5, r5
-; CHECK-NEXT:    li r6, 0
-; CHECK-NEXT:    li r7, 5
-; CHECK-NEXT:    mtctr r4
-; CHECK-NEXT:    li r8, 9
-; CHECK-NEXT:    li r4, 0
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_2: # %for.body
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    add r9, r3, r6
+; CHECK-NEXT:    ld r4, 0(r6)
+; CHECK-NEXT:    add r3, r4, r3
+; CHECK-NEXT:    ld r4, 4(r6)
 ; CHECK-NEXT:    add r6, r6, r5
-; CHECK-NEXT:    ldx r10, r9, r7
-; CHECK-NEXT:    ldx r9, r9, r8
-; CHECK-NEXT:    add r4, r10, r4
-; CHECK-NEXT:    add r4, r4, r9
+; CHECK-NEXT:    add r3, r3, r4
 ; CHECK-NEXT:    bdnz .LBB0_2
 ; CHECK-NEXT:  # %bb.3: # %for.cond.cleanup
-; CHECK-NEXT:    mr r3, r4
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB0_4:
-; CHECK-NEXT:    li r4, 0
-; CHECK-NEXT:    mr r3, r4
+; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    blr
 entry:
   %cmp16 = icmp sgt i32 %n, 0

diff  --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
index 0c030bf6601b5..346353bc12d0a 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
@@ -6,6 +6,10 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpd 5, 7
+; CHECK-NEXT:    std 19, -104(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 20, -96(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 21, -88(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 22, -80(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 24, -64(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 25, -56(1) # 8-byte Folded Spill
@@ -17,89 +21,95 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
 ; CHECK-NEXT:    bge 0, .LBB0_6
 ; CHECK-NEXT:  # %bb.1: # %.preheader
 ; CHECK-NEXT:    addi 30, 5, 1
-; CHECK-NEXT:    addi 29, 5, 3
-; CHECK-NEXT:    addi 28, 5, 2
+; CHECK-NEXT:    addi 28, 5, 3
+; CHECK-NEXT:    addi 27, 5, 2
 ; CHECK-NEXT:    mulld 12, 8, 5
-; CHECK-NEXT:    addi 3, 3, 16
+; CHECK-NEXT:    addi 29, 3, 16
 ; CHECK-NEXT:    mulld 0, 9, 8
-; CHECK-NEXT:    sldi 11, 10, 3
+; CHECK-NEXT:    mr 25, 12
 ; CHECK-NEXT:    mulld 30, 8, 30
-; CHECK-NEXT:    mulld 29, 8, 29
-; CHECK-NEXT:    mulld 8, 8, 28
+; CHECK-NEXT:    mulld 28, 8, 28
+; CHECK-NEXT:    mulld 8, 8, 27
+; CHECK-NEXT:    sldi 11, 10, 3
+; CHECK-NEXT:    li 27, 0
+; CHECK-NEXT:    mr 26, 30
 ; CHECK-NEXT:    b .LBB0_3
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_2:
 ; CHECK-NEXT:    add 5, 5, 9
-; CHECK-NEXT:    add 12, 12, 0
-; CHECK-NEXT:    add 30, 30, 0
-; CHECK-NEXT:    add 29, 29, 0
+; CHECK-NEXT:    add 25, 25, 0
+; CHECK-NEXT:    add 26, 26, 0
+; CHECK-NEXT:    add 28, 28, 0
 ; CHECK-NEXT:    add 8, 8, 0
+; CHECK-NEXT:    addi 27, 27, 1
 ; CHECK-NEXT:    cmpd 5, 7
 ; CHECK-NEXT:    bge 0, .LBB0_6
 ; CHECK-NEXT:  .LBB0_3: # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB0_5 Depth 2
-; CHECK-NEXT:    sub 28, 5, 10
-; CHECK-NEXT:    cmpd 6, 28
+; CHECK-NEXT:    sub 24, 5, 10
+; CHECK-NEXT:    cmpd 6, 24
 ; CHECK-NEXT:    bge 0, .LBB0_2
 ; CHECK-NEXT:  # %bb.4:
-; CHECK-NEXT:    add 26, 6, 12
-; CHECK-NEXT:    add 25, 6, 30
-; CHECK-NEXT:    add 24, 6, 29
-; CHECK-NEXT:    add 23, 6, 8
-; CHECK-NEXT:    sldi 27, 6, 3
-; CHECK-NEXT:    sldi 26, 26, 3
-; CHECK-NEXT:    sldi 25, 25, 3
-; CHECK-NEXT:    sldi 24, 24, 3
-; CHECK-NEXT:    sldi 23, 23, 3
-; CHECK-NEXT:    add 27, 4, 27
-; CHECK-NEXT:    add 26, 3, 26
-; CHECK-NEXT:    add 25, 3, 25
-; CHECK-NEXT:    add 24, 3, 24
-; CHECK-NEXT:    add 23, 3, 23
+; CHECK-NEXT:    maddld 19, 0, 27, 30
+; CHECK-NEXT:    maddld 20, 0, 27, 12
+; CHECK-NEXT:    add 22, 6, 28
+; CHECK-NEXT:    add 21, 6, 8
+; CHECK-NEXT:    add 20, 6, 20
+; CHECK-NEXT:    add 19, 6, 19
+; CHECK-NEXT:    sldi 23, 6, 3
+; CHECK-NEXT:    sldi 22, 22, 3
+; CHECK-NEXT:    sldi 21, 21, 3
+; CHECK-NEXT:    add 23, 4, 23
+; CHECK-NEXT:    add 22, 29, 22
+; CHECK-NEXT:    add 21, 29, 21
+; CHECK-NEXT:    sldi 20, 20, 3
+; CHECK-NEXT:    sldi 19, 19, 3
+; CHECK-NEXT:    add 20, 3, 20
+; CHECK-NEXT:    add 19, 3, 19
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_5: # Parent Loop BB0_3 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    lfd 0, 0(27)
-; CHECK-NEXT:    lfd 1, -16(26)
+; CHECK-NEXT:    lfd 0, 0(23)
+; CHECK-NEXT:    lfd 1, 0(20)
 ; CHECK-NEXT:    add 6, 6, 10
-; CHECK-NEXT:    cmpd 6, 28
+; CHECK-NEXT:    cmpd 6, 24
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(26)
+; CHECK-NEXT:    lfd 1, 8(20)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(26)
+; CHECK-NEXT:    lfd 1, 16(20)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(26)
-; CHECK-NEXT:    add 26, 26, 11
+; CHECK-NEXT:    lfd 1, 24(20)
+; CHECK-NEXT:    add 20, 20, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -16(25)
+; CHECK-NEXT:    lfd 1, 0(19)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(25)
+; CHECK-NEXT:    lfd 1, 8(19)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(25)
+; CHECK-NEXT:    lfd 1, 16(19)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(25)
-; CHECK-NEXT:    add 25, 25, 11
+; CHECK-NEXT:    lfd 1, 24(19)
+; CHECK-NEXT:    add 19, 19, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -16(23)
+; CHECK-NEXT:    lfd 1, -16(21)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(23)
+; CHECK-NEXT:    lfd 1, -8(21)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(23)
+; CHECK-NEXT:    lfd 1, 0(21)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(23)
-; CHECK-NEXT:    add 23, 23, 11
+; CHECK-NEXT:    lfd 1, 8(21)
+; CHECK-NEXT:    add 21, 21, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -16(24)
+; CHECK-NEXT:    lfd 1, -16(22)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(24)
+; CHECK-NEXT:    lfd 1, -8(22)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(24)
+; CHECK-NEXT:    lfd 1, 0(22)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(24)
-; CHECK-NEXT:    add 24, 24, 11
+; CHECK-NEXT:    lfd 1, 8(22)
+; CHECK-NEXT:    add 22, 22, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    stfd 0, 0(27)
-; CHECK-NEXT:    add 27, 27, 11
+; CHECK-NEXT:    stfd 0, 0(23)
+; CHECK-NEXT:    add 23, 23, 11
 ; CHECK-NEXT:    blt 0, .LBB0_5
 ; CHECK-NEXT:    b .LBB0_2
 ; CHECK-NEXT:  .LBB0_6:
@@ -111,6 +121,10 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
 ; CHECK-NEXT:    ld 25, -56(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 24, -64(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 23, -72(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 22, -80(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 21, -88(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 20, -96(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 19, -104(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
   %9 = icmp slt i64 %2, %4
   br i1 %9, label %10, label %97


        


More information about the llvm-branch-commits mailing list