[llvm] r328113 - [Hexagon] Eliminate subregisters from PHI nodes before pipelining
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 21 09:39:11 PDT 2018
Author: kparzysz
Date: Wed Mar 21 09:39:11 2018
New Revision: 328113
URL: http://llvm.org/viewvc/llvm-project?rev=328113&view=rev
Log:
[Hexagon] Eliminate subregisters from PHI nodes before pipelining
The pipeliner needs to remove instructions from the SlotIndexes
structure when they are deleted. Otherwise, the SlotIndexes map
has stale data, and an assert will occur when adding new
instructions.
This patch also changes the pipeliner to make the back-edge of
a loop carried dependence 1 cycle. The 1 cycle latency is added
to the anti-dependence that represents the back-edge. This
changes eliminates a couple of hacks added to the pipeliner to
handle the latency of the back-edge. It is needed to correctly
pipeline the test case for the sub-register elimination pass.
Modified:
llvm/trunk/lib/CodeGen/MachinePipeliner.cpp
llvm/trunk/test/CodeGen/Hexagon/swp-prolog-phi.ll
Modified: llvm/trunk/lib/CodeGen/MachinePipeliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachinePipeliner.cpp?rev=328113&r1=328112&r2=328113&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachinePipeliner.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachinePipeliner.cpp Wed Mar 21 09:39:11 2018
@@ -218,6 +218,7 @@ public:
}
private:
+ void preprocessPhiNodes(MachineBasicBlock &B);
bool canPipelineLoop(MachineLoop &L);
bool scheduleLoop(MachineLoop &L);
bool swingModuloScheduler(MachineLoop &L);
@@ -357,20 +358,6 @@ public:
bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true);
- /// The latency of the dependence.
- unsigned getLatency(SUnit *Source, const SDep &Dep) {
- // Anti dependences represent recurrences, so use the latency of the
- // instruction on the back-edge.
- if (Dep.getKind() == SDep::Anti) {
- if (Source->getInstr()->isPHI())
- return Dep.getSUnit()->Latency;
- if (Dep.getSUnit()->getInstr()->isPHI())
- return Source->Latency;
- return Dep.getLatency();
- }
- return Dep.getLatency();
- }
-
/// The distance function, which indicates that operation V of iteration I
/// depends on operations U of iteration I-distance.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
@@ -484,12 +471,19 @@ class NodeSet {
int MaxDepth = 0;
unsigned Colocate = 0;
SUnit *ExceedPressure = nullptr;
+ unsigned Latency = 0;
public:
using iterator = SetVector<SUnit *>::const_iterator;
NodeSet() = default;
- NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
+ NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
+ Latency = 0;
+ for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
+ for (const SDep &Succ : Nodes[i]->Succs)
+ if (Nodes.count(Succ.getSUnit()))
+ Latency += Succ.getLatency();
+ }
bool insert(SUnit *SU) { return Nodes.insert(SU); }
@@ -820,18 +814,41 @@ bool MachinePipeliner::canPipelineLoop(M
if (!L.getLoopPreheader())
return false;
- // If any of the Phis contain subregs, then we can't pipeline
- // because we don't know how to maintain subreg information in the
- // VMap structure.
- MachineBasicBlock *MBB = L.getHeader();
- for (auto &PHI : MBB->phis())
- for (unsigned i = 1; i != PHI.getNumOperands(); i += 2)
- if (PHI.getOperand(i).getSubReg() != 0)
- return false;
-
+ // Remove any subregisters from inputs to phi nodes.
+ preprocessPhiNodes(*L.getHeader());
return true;
}
+void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
+
+ for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
+ MachineOperand &DefOp = PI.getOperand(0);
+ assert(DefOp.getSubReg() == 0);
+ auto *RC = MRI.getRegClass(DefOp.getReg());
+
+ for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
+ MachineOperand &RegOp = PI.getOperand(i);
+ if (RegOp.getSubReg() == 0)
+ continue;
+
+ // If the operand uses a subregister, replace it with a new register
+ // without subregisters, and generate a copy to the new register.
+ unsigned NewReg = MRI.createVirtualRegister(RC);
+ MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
+ MachineBasicBlock::iterator At = PredB.getFirstTerminator();
+ const DebugLoc &DL = PredB.findDebugLoc(At);
+ auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(RegOp.getReg(), getRegState(RegOp),
+ RegOp.getSubReg());
+ Slots.insertMachineInstrInMaps(*Copy);
+ RegOp.setReg(NewReg);
+ RegOp.setSubReg(0);
+ }
+ }
+}
+
/// The SMS algorithm consists of the following main steps:
/// 1. Computation and analysis of the dependence graph.
/// 2. Ordering of the nodes (instructions).
@@ -1078,31 +1095,41 @@ void SwingSchedulerDAG::addLoopCarriedDe
int64_t Offset1, Offset2;
if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
!TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
- SU.addPred(SDep(Load, SDep::Barrier));
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
continue;
}
if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
"What happened to the chain edge?");
- SU.addPred(SDep(Load, SDep::Barrier));
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
continue;
}
// Second, the more expensive check that uses alias analysis on the
// base registers. If they alias, and the load offset is less than
// the store offset, the mark the dependence as loop carried.
if (!AA) {
- SU.addPred(SDep(Load, SDep::Barrier));
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
continue;
}
MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
MachineMemOperand *MMO2 = *MI.memoperands_begin();
if (!MMO1->getValue() || !MMO2->getValue()) {
- SU.addPred(SDep(Load, SDep::Barrier));
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
continue;
}
if (MMO1->getValue() == MMO2->getValue() &&
MMO1->getOffset() <= MMO2->getOffset()) {
- SU.addPred(SDep(Load, SDep::Barrier));
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
continue;
}
AliasResult AAResult = AA->alias(
@@ -1111,8 +1138,11 @@ void SwingSchedulerDAG::addLoopCarriedDe
MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
MMO2->getAAInfo()));
- if (AAResult != NoAlias)
- SU.addPred(SDep(Load, SDep::Barrier));
+ if (AAResult != NoAlias) {
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ }
}
}
}
@@ -1154,6 +1184,7 @@ void SwingSchedulerDAG::updatePhiDepende
if (SU != nullptr && UseMI->isPHI()) {
if (!MI->isPHI()) {
SDep Dep(SU, SDep::Anti, Reg);
+ Dep.setLatency(1);
I.addPred(Dep);
} else {
HasPhiDef = Reg;
@@ -1599,12 +1630,12 @@ void SwingSchedulerDAG::computeNodeFunct
EP = SU->Preds.end();
IP != EP; ++IP) {
SUnit *pred = IP->getSUnit();
- if (getLatency(SU, *IP) == 0)
+ if (IP->getLatency() == 0)
zeroLatencyDepth =
std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
if (ignoreDependence(*IP, true))
continue;
- asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
+ asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
getDistance(pred, SU, *IP) * MII));
}
maxASAP = std::max(maxASAP, asap);
@@ -1623,12 +1654,12 @@ void SwingSchedulerDAG::computeNodeFunct
ES = SU->Succs.end();
IS != ES; ++IS) {
SUnit *succ = IS->getSUnit();
- if (getLatency(SU, *IS) == 0)
+ if (IS->getLatency() == 0)
zeroLatencyHeight =
std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
if (ignoreDependence(*IS, true))
continue;
- alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
+ alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
getDistance(SU, succ, *IS) * MII));
}
@@ -2340,6 +2371,8 @@ void SwingSchedulerDAG::generatePipeline
addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
// Remove the original loop since it's no longer referenced.
+ for (auto &I : *BB)
+ LIS.RemoveMachineInstrFromMaps(I);
BB->clear();
BB->eraseFromParent();
@@ -2916,6 +2949,7 @@ void SwingSchedulerDAG::removeDeadInstru
used = false;
}
if (!used) {
+ LIS.RemoveMachineInstrFromMaps(*MI);
MI++->eraseFromParent();
continue;
}
@@ -2930,6 +2964,7 @@ void SwingSchedulerDAG::removeDeadInstru
++BBI;
unsigned reg = MI->getOperand(0).getReg();
if (MRI.use_begin(reg) == MRI.use_end()) {
+ LIS.RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
}
}
@@ -3636,7 +3671,7 @@ void SMSchedule::computeStart(SUnit *SU,
const SDep &Dep = SU->Preds[i];
if (Dep.getSUnit() == I) {
if (!DAG->isBackedge(SU, Dep)) {
- int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
+ int EarlyStart = cycle + Dep.getLatency() -
DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
*MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
if (DAG->isLoopCarriedOrder(SU, Dep, false)) {
@@ -3644,7 +3679,7 @@ void SMSchedule::computeStart(SUnit *SU,
*MinEnd = std::min(*MinEnd, End);
}
} else {
- int LateStart = cycle - DAG->getLatency(SU, Dep) +
+ int LateStart = cycle - Dep.getLatency() +
DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
*MinLateStart = std::min(*MinLateStart, LateStart);
}
@@ -3660,7 +3695,7 @@ void SMSchedule::computeStart(SUnit *SU,
if (SU->Succs[i].getSUnit() == I) {
const SDep &Dep = SU->Succs[i];
if (!DAG->isBackedge(SU, Dep)) {
- int LateStart = cycle - DAG->getLatency(SU, Dep) +
+ int LateStart = cycle - Dep.getLatency() +
DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
*MinLateStart = std::min(*MinLateStart, LateStart);
if (DAG->isLoopCarriedOrder(SU, Dep)) {
@@ -3668,7 +3703,7 @@ void SMSchedule::computeStart(SUnit *SU,
*MaxStart = std::max(*MaxStart, Start);
}
} else {
- int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
+ int EarlyStart = cycle + Dep.getLatency() -
DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
*MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
}
Modified: llvm/trunk/test/CodeGen/Hexagon/swp-prolog-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/swp-prolog-phi.ll?rev=328113&r1=328112&r2=328113&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/swp-prolog-phi.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/swp-prolog-phi.ll Wed Mar 21 09:39:11 2018
@@ -9,16 +9,16 @@
; CHECK-NOT: vcmp.gt([[VREG]].uh,v{{[0-9]+}}.uh)
; CHECK: loop0
-define void @f0() #0 {
+define void @f0(<64 x i32> %a0, <32 x i32> %a1) #0 {
b0:
br i1 undef, label %b1, label %b5
b1: ; preds = %b0
- %v0 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> undef)
+ %v0 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %a0)
br label %b2
b2: ; preds = %b4, %b1
- %v1 = phi <32 x i32> [ undef, %b1 ], [ %v7, %b4 ]
+ %v1 = phi <32 x i32> [ %a1, %b1 ], [ %v7, %b4 ]
br label %b3
b3: ; preds = %b3, %b2
More information about the llvm-commits
mailing list