[llvm-branch-commits] [llvm] 5a87328 - more ds/dq preparation
Chen Zheng via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 17 22:15:11 PDT 2021
Author: Chen Zheng
Date: 2021-07-15T07:54:49Z
New Revision: 5a8732852b4d7225acaa347f705798fe7d61e92c
URL: https://github.com/llvm/llvm-project/commit/5a8732852b4d7225acaa347f705798fe7d61e92c
DIFF: https://github.com/llvm/llvm-project/commit/5a8732852b4d7225acaa347f705798fe7d61e92c.diff
LOG: more ds/dq preparation
Added:
Modified:
llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 1d2b1ed3f6269..5f08268277a0e 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -169,7 +169,7 @@ namespace {
private:
PPCTargetMachine *TM = nullptr;
- const PPCSubtarget *ST;
+ const PPCSubtarget *ST;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
@@ -184,10 +184,13 @@ namespace {
bool runOnLoop(Loop *L);
/// Check if required PHI node is already exist in Loop \p L.
- bool alreadyPrepared(Loop *L, Instruction* MemI,
+ bool alreadyPrepared(Loop *L, Instruction *MemI,
const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV,
- InstrForm Form);
+ const SCEV *BasePtrIncSCEV, InstrForm Form);
+
+ /// Get the value which defines the increment SCEV \p BasePtrIncSCEV.
+ Value *getPreparedIncNode(Loop *L, Instruction *MemI,
+ const SCEV *BasePtrIncSCEV);
/// Collect condition matched(\p isValidCandidate() returns true)
/// candidates in Loop \p L.
@@ -266,7 +269,7 @@ static std::string getInstrName(const Value *I, StringRef Suffix) {
if (I->hasName())
return (I->getName() + Suffix).str();
else
- return "";
+ return "";
}
static Value *GetPointerOperand(Value *MemI) {
@@ -404,13 +407,13 @@ bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
// contains following load/stores with
diff erent remainders:
// 1: 10 load/store whose remainder is 1;
// 2: 9 load/store whose remainder is 2;
- // 3: 1 for remainder 3 and 0 for remainder 0;
+ // 3: 1 for remainder 3 and 0 for remainder 0;
// Now we will choose the first load/store whose remainder is 1 as base and
// adjust all other load/stores according to new base, so we will get 10 DS
// form and 10 X form.
// But we should be more clever, for this case we could use two bases, one for
- // remainder 1 and the other for remainder 2, thus we could get 19 DS form and 1
- // X form.
+ // remainder 1 and the other for remainder 2, thus we could get 19 DS form and
+ // 1 X form.
unsigned MaxCountRemainder = 0;
for (unsigned j = 0; j < (unsigned)Form; j++)
if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
@@ -515,28 +518,48 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L))
return MadeChange;
- const SCEVConstant *BasePtrIncSCEV =
- dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
- if (!BasePtrIncSCEV)
+ bool IsConstantInc = false;
+ const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE);
+ Value *IncNode = getPreparedIncNode(L, MemI, BasePtrIncSCEV);
+
+ const SCEVConstant *BasePtrIncConstantSCEV =
+ dyn_cast<SCEVConstant>(BasePtrIncSCEV);
+ if (BasePtrIncConstantSCEV)
+ IsConstantInc = true;
+
+ // No valid representation for the increment.
+ if (!IncNode) {
+ LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n");
return MadeChange;
+ }
+
+ // Now we only handle update form for constant increment.
+ // FIXME: add support for non-constant increment UpdateForm.
+ if (!IsConstantInc && Form == UpdateForm) {
+ LLVM_DEBUG(dbgs() << "not a constant incresement for update form!\n");
+ return MadeChange;
+ }
// For some DS form load/store instructions, it can also be an update form,
// if the stride is a multipler of 4. Use update form if prefer it.
- bool CanPreInc = (Form == UpdateForm ||
- ((Form == DSForm) && !BasePtrIncSCEV->getAPInt().urem(4) &&
- PreferUpdateForm));
+ bool CanPreInc =
+ (Form == UpdateForm ||
+ ((Form == DSForm) && IsConstantInc &&
+ !BasePtrIncConstantSCEV->getAPInt().urem(4) && PreferUpdateForm));
const SCEV *BasePtrStartSCEV = nullptr;
if (CanPreInc)
BasePtrStartSCEV =
- SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncSCEV);
+ SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncConstantSCEV);
else
BasePtrStartSCEV = BasePtrSCEV->getStart();
if (!isSafeToExpand(BasePtrStartSCEV, *SE))
return MadeChange;
- if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form))
+ if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) {
+ LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n");
return MadeChange;
+ }
LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
@@ -565,9 +588,11 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
Instruction *PtrInc = nullptr;
Instruction *NewBasePtr = nullptr;
if (CanPreInc) {
+ assert(BasePtrIncConstantSCEV &&
+ "update form now only supports constant increment.");
Instruction *InsPoint = &*Header->getFirstInsertionPt();
PtrInc = GetElementPtrInst::Create(
- I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+ I8Ty, NewPHI, BasePtrIncConstantSCEV->getValue(),
getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
for (auto PI : predecessors(Header)) {
@@ -594,9 +619,8 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
BasicBlock *BB = PI;
Instruction *InsPoint = BB->getTerminator();
PtrInc = GetElementPtrInst::Create(
- I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
- getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
-
+ I8Ty, NewPHI, IncNode, getInstrName(MemI, GEPNodeIncNameSuffix),
+ InsPoint);
cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
NewPHI->addIncoming(PtrInc, PI);
@@ -673,7 +697,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
MadeChange = true;
- SuccPrepCount++;
+ SuccPrepCount++;
if (Form == DSForm && !CanPreInc)
DSFormChainRewritten++;
@@ -726,14 +750,98 @@ bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Bucket
return MadeChange;
}
+Value *PPCLoopInstrFormPrep::getPreparedIncNode(Loop *L, Instruction *MemI,
+ const SCEV *BasePtrIncSCEV) {
+ if (isa<SCEVConstant>(BasePtrIncSCEV))
+ return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();
+
+ if (!SE->isLoopInvariant(BasePtrIncSCEV, L))
+ return nullptr;
+
+ BasicBlock *BB = MemI->getParent();
+ if (!BB)
+ return nullptr;
+
+ BasicBlock *PredBB = L->getLoopPredecessor();
+ BasicBlock *LatchBB = L->getLoopLatch();
+
+ if (!PredBB || !LatchBB)
+ return nullptr;
+
+ auto getExistingNode = [&](Instruction *I) -> Value * {
+ Value *StrippedBasePtr = I;
+ while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr)) {
+ // We only check bitcast instruction with only 1 user here for compiling
+ // time considering.
+ if (BC->hasOneUser())
+ StrippedBasePtr = *BC->users().begin();
+ else
+ break;
+ }
+
+ Instruction *StrippedI = dyn_cast<Instruction>(StrippedBasePtr);
+ if (!StrippedI)
+ return nullptr;
+
+ // LSR pass may add a getelementptr instruction to do the loop increment,
+ // also search in that getelementptr instruction.
+ if (StrippedI->getOpcode() == Instruction::Add ||
+ (StrippedI->getOpcode() == Instruction::GetElementPtr &&
+ StrippedI->getNumOperands() == 2)) {
+ if (SE->getSCEVAtScope(StrippedI->getOperand(0), L) == BasePtrIncSCEV)
+ return StrippedI->getOperand(0);
+ if (SE->getSCEVAtScope(StrippedI->getOperand(1), L) == BasePtrIncSCEV)
+ return StrippedI->getOperand(1);
+ return nullptr;
+ }
+ return nullptr;
+ };
+
+ // Run through the PHIs and check their add users to find valid representation
+ // for the increment SCEV.
+ iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
+ for (auto &CurrentPHI : PHIIter) {
+ PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
+ if (!CurrentPHINode)
+ continue;
+
+ if (!SE->isSCEVable(CurrentPHINode->getType()))
+ continue;
+
+ const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
+
+ const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
+ if (!PHIBasePtrSCEV)
+ continue;
+
+ const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE);
+ if (!PHIBasePtrIncSCEV)
+ continue;
+
+ if (CurrentPHINode->getNumIncomingValues() == 2) {
+ if ((CurrentPHINode->getIncomingBlock(0) == LatchBB &&
+ CurrentPHINode->getIncomingBlock(1) == PredBB) ||
+ (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
+ CurrentPHINode->getIncomingBlock(0) == PredBB)) {
+ if (PHIBasePtrIncSCEV == BasePtrIncSCEV)
+ for (User *User : CurrentPHINode->users())
+ if (Instruction *I = dyn_cast<Instruction>(User))
+ if (Value *IncNode = getExistingNode(I))
+ return IncNode;
+ }
+ }
+ }
+ return nullptr;
+}
+
// In order to prepare for the preferred instruction form, a PHI is added.
// This function will check to see if that PHI already exists and will return
// true if it found an existing PHI with the matched start and increment as the
// one we wanted to create.
-bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
- const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV,
- InstrForm Form) {
+bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
+ const SCEV *BasePtrStartSCEV,
+ const SCEV *BasePtrIncSCEV,
+ InstrForm Form) {
BasicBlock *BB = MemI->getParent();
if (!BB)
return false;
@@ -777,7 +885,7 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
++PHINodeAlreadyExistsUpdate;
return true;
- }
+ }
if (Form == DSForm || Form == DQForm) {
const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));
@@ -788,7 +896,7 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
++PHINodeAlreadyExistsDQ;
return true;
}
- }
+ }
}
}
}
diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
index 8f6717420eb68..6132074004305 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
@@ -2,9 +2,6 @@
; RUN: llc -disable-lsr -ppc-asm-full-reg-names -verify-machineinstrs \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
-; FIXME: PPCLoopInstrFormPrep should be able to common base for "(unsigned long long *)(p + j + 5)"
-; and "(unsigned long long *)(p + j + 9)", thus we only have two DS form load inside the loop.
-
; long long foo(char *p, int n, int count) {
; int j = 0;
; long long sum = 0;
@@ -22,29 +19,24 @@ define i64 @foo(i8* %p, i32 signext %n, i32 signext %count) {
; CHECK-NEXT: cmpwi r4, 1
; CHECK-NEXT: blt cr0, .LBB0_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: clrldi r4, r4, 32
+; CHECK-NEXT: addi r6, r3, 5
+; CHECK-NEXT: clrldi r3, r4, 32
; CHECK-NEXT: extsw r5, r5
-; CHECK-NEXT: li r6, 0
-; CHECK-NEXT: li r7, 5
-; CHECK-NEXT: mtctr r4
-; CHECK-NEXT: li r8, 9
-; CHECK-NEXT: li r4, 0
+; CHECK-NEXT: mtctr r3
+; CHECK-NEXT: li r3, 0
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_2: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: add r9, r3, r6
+; CHECK-NEXT: ld r4, 0(r6)
+; CHECK-NEXT: add r3, r4, r3
+; CHECK-NEXT: ld r4, 4(r6)
; CHECK-NEXT: add r6, r6, r5
-; CHECK-NEXT: ldx r10, r9, r7
-; CHECK-NEXT: ldx r9, r9, r8
-; CHECK-NEXT: add r4, r10, r4
-; CHECK-NEXT: add r4, r4, r9
+; CHECK-NEXT: add r3, r3, r4
; CHECK-NEXT: bdnz .LBB0_2
; CHECK-NEXT: # %bb.3: # %for.cond.cleanup
-; CHECK-NEXT: mr r3, r4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: li r4, 0
-; CHECK-NEXT: mr r3, r4
+; CHECK-NEXT: li r3, 0
; CHECK-NEXT: blr
entry:
%cmp16 = icmp sgt i32 %n, 0
diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
index 0c030bf6601b5..346353bc12d0a 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
@@ -6,6 +6,10 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpd 5, 7
+; CHECK-NEXT: std 19, -104(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 20, -96(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 21, -88(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill
; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill
; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill
; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill
@@ -17,89 +21,95 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
; CHECK-NEXT: bge 0, .LBB0_6
; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: addi 30, 5, 1
-; CHECK-NEXT: addi 29, 5, 3
-; CHECK-NEXT: addi 28, 5, 2
+; CHECK-NEXT: addi 28, 5, 3
+; CHECK-NEXT: addi 27, 5, 2
; CHECK-NEXT: mulld 12, 8, 5
-; CHECK-NEXT: addi 3, 3, 16
+; CHECK-NEXT: addi 29, 3, 16
; CHECK-NEXT: mulld 0, 9, 8
-; CHECK-NEXT: sldi 11, 10, 3
+; CHECK-NEXT: mr 25, 12
; CHECK-NEXT: mulld 30, 8, 30
-; CHECK-NEXT: mulld 29, 8, 29
-; CHECK-NEXT: mulld 8, 8, 28
+; CHECK-NEXT: mulld 28, 8, 28
+; CHECK-NEXT: mulld 8, 8, 27
+; CHECK-NEXT: sldi 11, 10, 3
+; CHECK-NEXT: li 27, 0
+; CHECK-NEXT: mr 26, 30
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: add 5, 5, 9
-; CHECK-NEXT: add 12, 12, 0
-; CHECK-NEXT: add 30, 30, 0
-; CHECK-NEXT: add 29, 29, 0
+; CHECK-NEXT: add 25, 25, 0
+; CHECK-NEXT: add 26, 26, 0
+; CHECK-NEXT: add 28, 28, 0
; CHECK-NEXT: add 8, 8, 0
+; CHECK-NEXT: addi 27, 27, 1
; CHECK-NEXT: cmpd 5, 7
; CHECK-NEXT: bge 0, .LBB0_6
; CHECK-NEXT: .LBB0_3: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_5 Depth 2
-; CHECK-NEXT: sub 28, 5, 10
-; CHECK-NEXT: cmpd 6, 28
+; CHECK-NEXT: sub 24, 5, 10
+; CHECK-NEXT: cmpd 6, 24
; CHECK-NEXT: bge 0, .LBB0_2
; CHECK-NEXT: # %bb.4:
-; CHECK-NEXT: add 26, 6, 12
-; CHECK-NEXT: add 25, 6, 30
-; CHECK-NEXT: add 24, 6, 29
-; CHECK-NEXT: add 23, 6, 8
-; CHECK-NEXT: sldi 27, 6, 3
-; CHECK-NEXT: sldi 26, 26, 3
-; CHECK-NEXT: sldi 25, 25, 3
-; CHECK-NEXT: sldi 24, 24, 3
-; CHECK-NEXT: sldi 23, 23, 3
-; CHECK-NEXT: add 27, 4, 27
-; CHECK-NEXT: add 26, 3, 26
-; CHECK-NEXT: add 25, 3, 25
-; CHECK-NEXT: add 24, 3, 24
-; CHECK-NEXT: add 23, 3, 23
+; CHECK-NEXT: maddld 19, 0, 27, 30
+; CHECK-NEXT: maddld 20, 0, 27, 12
+; CHECK-NEXT: add 22, 6, 28
+; CHECK-NEXT: add 21, 6, 8
+; CHECK-NEXT: add 20, 6, 20
+; CHECK-NEXT: add 19, 6, 19
+; CHECK-NEXT: sldi 23, 6, 3
+; CHECK-NEXT: sldi 22, 22, 3
+; CHECK-NEXT: sldi 21, 21, 3
+; CHECK-NEXT: add 23, 4, 23
+; CHECK-NEXT: add 22, 29, 22
+; CHECK-NEXT: add 21, 29, 21
+; CHECK-NEXT: sldi 20, 20, 3
+; CHECK-NEXT: sldi 19, 19, 3
+; CHECK-NEXT: add 20, 3, 20
+; CHECK-NEXT: add 19, 3, 19
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lfd 0, 0(27)
-; CHECK-NEXT: lfd 1, -16(26)
+; CHECK-NEXT: lfd 0, 0(23)
+; CHECK-NEXT: lfd 1, 0(20)
; CHECK-NEXT: add 6, 6, 10
-; CHECK-NEXT: cmpd 6, 28
+; CHECK-NEXT: cmpd 6, 24
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, -8(26)
+; CHECK-NEXT: lfd 1, 8(20)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, 0(26)
+; CHECK-NEXT: lfd 1, 16(20)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, 8(26)
-; CHECK-NEXT: add 26, 26, 11
+; CHECK-NEXT: lfd 1, 24(20)
+; CHECK-NEXT: add 20, 20, 11
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, -16(25)
+; CHECK-NEXT: lfd 1, 0(19)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, -8(25)
+; CHECK-NEXT: lfd 1, 8(19)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, 0(25)
+; CHECK-NEXT: lfd 1, 16(19)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, 8(25)
-; CHECK-NEXT: add 25, 25, 11
+; CHECK-NEXT: lfd 1, 24(19)
+; CHECK-NEXT: add 19, 19, 11
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, -16(23)
+; CHECK-NEXT: lfd 1, -16(21)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, -8(23)
+; CHECK-NEXT: lfd 1, -8(21)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, 0(23)
+; CHECK-NEXT: lfd 1, 0(21)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, 8(23)
-; CHECK-NEXT: add 23, 23, 11
+; CHECK-NEXT: lfd 1, 8(21)
+; CHECK-NEXT: add 21, 21, 11
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, -16(24)
+; CHECK-NEXT: lfd 1, -16(22)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, -8(24)
+; CHECK-NEXT: lfd 1, -8(22)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, 0(24)
+; CHECK-NEXT: lfd 1, 0(22)
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: lfd 1, 8(24)
-; CHECK-NEXT: add 24, 24, 11
+; CHECK-NEXT: lfd 1, 8(22)
+; CHECK-NEXT: add 22, 22, 11
; CHECK-NEXT: xsadddp 0, 0, 1
-; CHECK-NEXT: stfd 0, 0(27)
-; CHECK-NEXT: add 27, 27, 11
+; CHECK-NEXT: stfd 0, 0(23)
+; CHECK-NEXT: add 23, 23, 11
; CHECK-NEXT: blt 0, .LBB0_5
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_6:
@@ -111,6 +121,10 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
; CHECK-NEXT: ld 25, -56(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 22, -80(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 21, -88(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 20, -96(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 19, -104(1) # 8-byte Folded Reload
; CHECK-NEXT: blr
%9 = icmp slt i64 %2, %4
br i1 %9, label %10, label %97
More information about the llvm-branch-commits
mailing list