[llvm] e0297a8 - [ModuloSchedule] Fix a bug in experimental expander
Thomas Raoux via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 23 16:09:09 PST 2019
Author: Thomas Raoux
Date: 2019-11-23T16:01:47-08:00
New Revision: e0297a8bee6586280640f31b876d4da15966f8b7
URL: https://github.com/llvm/llvm-project/commit/e0297a8bee6586280640f31b876d4da15966f8b7
DIFF: https://github.com/llvm/llvm-project/commit/e0297a8bee6586280640f31b876d4da15966f8b7.diff
LOG: [ModuloSchedule] Fix a bug in experimental expander
Fix two problems that popped up after my last patch. One is that the
stiching of prologue/epilogue can be wrong when reading a value from a
previsou stage. Also changed how we duplicate phi instructions to avoid
generating extra phi that we delete later.
Differential Revision: https://reviews.llvm.org/D70213
Added:
Modified:
llvm/include/llvm/CodeGen/ModuloSchedule.h
llvm/lib/CodeGen/ModuloSchedule.cpp
llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll
llvm/test/CodeGen/Hexagon/swp-stages4.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h
index f041d9e48bd9..55c52f3447b0 100644
--- a/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -290,6 +290,9 @@ class PeelingModuloScheduleExpander {
/// but not produced (in the epilog) or produced but not available (in the
/// prolog).
DenseMap<MachineBasicBlock *, BitVector> AvailableStages;
+ /// When peeling the epilogue keep track of the distance between the phi
+ /// nodes and the kernel.
+ DenseMap<MachineInstr *, unsigned> PhiNodeLoopIteration;
/// CanonicalMIs and BlockMIs form a bidirectional map between any of the
/// loop kernel clones.
@@ -351,6 +354,9 @@ class PeelingModuloScheduleExpander {
MI = CanonicalMIs[MI];
return Schedule.getStage(MI);
}
+ /// Helper function to find the right canonical register for a phi instruction
+ /// coming from a peeled out prologue.
+ Register getPhiCanonicalReg(MachineInstr* CanonicalPhi, MachineInstr* Phi);
/// Target loop info before kernel peeling.
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info;
};
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index b958a9de0858..097b836f4fb6 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -1215,7 +1215,7 @@ namespace {
// Remove any dead phis in MBB. Dead phis either have only one block as input
// (in which case they are the identity) or have no uses.
void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
- LiveIntervals *LIS) {
+ LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
bool Changed = true;
while (Changed) {
Changed = false;
@@ -1227,7 +1227,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
LIS->RemoveMachineInstrFromMaps(MI);
MI.eraseFromParent();
Changed = true;
- } else if (MI.getNumExplicitOperands() == 3) {
+ } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
MRI.constrainRegClass(MI.getOperand(1).getReg(),
MRI.getRegClass(MI.getOperand(0).getReg()));
MRI.replaceRegWith(MI.getOperand(0).getReg(),
@@ -1658,22 +1658,56 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
for (auto *P : PhiToDelete)
P->eraseFromParent();
InsertPt = DestBB->getFirstNonPHI();
- for (MachineInstr &MI : SourceBB->phis()) {
- MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
+ // Helper to clone Phi instructions into the destination block. We clone Phi
+ // greedily to avoid combinatorial explosion of Phi instructions.
+ auto clonePhi = [&](MachineInstr *Phi) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
DestBB->insert(InsertPt, NewMI);
- Register OrigR = MI.getOperand(0).getReg();
+ Register OrigR = Phi->getOperand(0).getReg();
Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
NewMI->getOperand(0).setReg(R);
NewMI->getOperand(1).setReg(OrigR);
NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
Remaps[OrigR] = R;
- CanonicalMIs[NewMI] = CanonicalMIs[&MI];
- BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NewMI;
- }
- for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I)
- for (MachineOperand &MO : I->uses())
- if (MO.isReg() && Remaps.count(MO.getReg()))
+ CanonicalMIs[NewMI] = CanonicalMIs[Phi];
+ BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
+ PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi];
+ return R;
+ };
+ for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
+ for (MachineOperand &MO : I->uses()) {
+ if (!MO.isReg())
+ continue;
+ if (Remaps.count(MO.getReg()))
MO.setReg(Remaps[MO.getReg()]);
+ else {
+ // If we are using a phi from the source block we need to add a new phi
+ // pointing to the old one.
+ MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
+ if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
+ Register R = clonePhi(Use);
+ MO.setReg(R);
+ }
+ }
+ }
+ }
+}
+
+Register
+PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi,
+ MachineInstr *Phi) {
+ unsigned distance = PhiNodeLoopIteration[Phi];
+ MachineInstr *CanonicalUse = CanonicalPhi;
+ for (unsigned I = 0; I < distance; ++I) {
+ assert(CanonicalUse->isPHI());
+ assert(CanonicalUse->getNumOperands() == 5);
+ unsigned LoopRegIdx = 3, InitRegIdx = 1;
+ if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
+ std::swap(LoopRegIdx, InitRegIdx);
+ CanonicalUse =
+ MRI.getVRegDef(CanonicalUse->getOperand(LoopRegIdx).getReg());
+ }
+ return CanonicalUse->getOperand(0).getReg();
}
void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
@@ -1698,7 +1732,7 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// property that any value deffed in BB but used outside of BB is used by a
// PHI in the exiting block.
MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
-
+ EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
// Push out the epilogs, again in reverse order.
// We can't assume anything about the minumum loop trip count at this point,
// so emit a fairly complex epilog.
@@ -1717,7 +1751,13 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// instructions of a previous loop iteration.
for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
Epilogs.push_back(peelKernel(LPD_Back));
- filterInstructions(Epilogs.back(), Schedule.getNumStages() - I);
+ MachineBasicBlock *B = Epilogs.back();
+ filterInstructions(B, Schedule.getNumStages() - I);
+ // Keep track at which iteration each phi belongs to. We need it to know
+ // what version of the variable to use during prologue/epilogue stitching.
+ EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
+ for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi)
+ PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I;
}
for (size_t I = 0; I < Epilogs.size(); I++) {
LS.reset();
@@ -1745,8 +1785,16 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
for (MachineInstr &MI : (*EI)->phis()) {
Register Reg = MI.getOperand(1).getReg();
MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
- if (Use && Use->getParent() == Pred)
+ if (Use && Use->getParent() == Pred) {
+ MachineInstr *CanonicalUse = CanonicalMIs[Use];
+ if (CanonicalUse->isPHI()) {
+ // If the use comes from a phi we need to skip as many phi as the
+ // distance between the epilogue and the kernel. Trace through the phi
+ // chain to find the right value.
+ Reg = getPhiCanonicalReg(CanonicalUse, Use);
+ }
Reg = getEquivalentRegisterIn(Reg, *PI);
+ }
MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
MI.addOperand(MachineOperand::CreateMBB(*PI));
}
diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll
index d3689cc9d8f1..194dfa8d06a7 100644
--- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi12.ll
@@ -5,7 +5,7 @@
; CHECK: loop0
; CHECK: r{{[0-9]+}} = add([[REG0:r([0-9]+)]],#8)
-; CHECK: [[REG0:r([0-9]+)]] = [[REG1:r([0-9]+)]]
+; CHECK: [[REG0]] = [[REG1:r([0-9]+)]]
; CHECK: endloop0
; CHECK: = add([[REG1]],#8)
diff --git a/llvm/test/CodeGen/Hexagon/swp-stages4.ll b/llvm/test/CodeGen/Hexagon/swp-stages4.ll
index a10775512dc1..1b96aca2a48c 100644
--- a/llvm/test/CodeGen/Hexagon/swp-stages4.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-stages4.ll
@@ -5,6 +5,7 @@
; CHECK: = and
; CHECK: = and
+; CHECK: r[[REGA:[0-9]+]] = memub(r{{[0-9]+}}+#1)
; CHECK: r[[REG0:[0-9]+]] = and(r[[REG1:[0-9]+]],#255)
; CHECK-NOT: r[[REG0]] = and(r[[REG1]],#255)
; CHECK: loop0(.LBB0_[[LOOP:.]],
More information about the llvm-commits
mailing list