[llvm] r303111 - Re-submit AMDGPUMachineCFGStructurizer.
Jan Sjodin via llvm-commits
llvm-commits at lists.llvm.org
Mon May 15 13:18:37 PDT 2017
Author: jsjodin
Date: Mon May 15 15:18:37 2017
New Revision: 303111
URL: http://llvm.org/viewvc/llvm-project?rev=303111&view=rev
Log:
Re-submit AMDGPUMachineCFGStructurizer.
Differential Revision: https://reviews.llvm.org/D23209
Added:
llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
- copied, changed from r303097, llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=303111&r1=303110&r2=303111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Mon May 15 15:18:37 2017
@@ -50,6 +50,10 @@ FunctionPass *createSIDebuggerInsertNops
FunctionPass *createSIInsertWaitsPass();
FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
+FunctionPass *createAMDGPUMachineCFGStructurizerPass();
+
+void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
+extern char &AMDGPUMachineCFGStructurizerID;
ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr);
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
Copied: llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp (from r303097, llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp?p2=llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp&p1=llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp&r1=303097&r2=303111&rev=303111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp Mon May 15 15:18:37 2017
@@ -1,4 +1,4 @@
-//===-- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. -----===//
+//===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===//
//
// The LLVM Compiler Infrastructure
//
@@ -75,7 +75,6 @@ public:
void addDest(unsigned DestReg, const DebugLoc &DL);
void replaceDef(unsigned OldDestReg, unsigned NewDestReg);
void deleteDef(unsigned DestReg);
- DebugLoc getDebugLoc(unsigned DestReg);
void addSource(unsigned DestReg, unsigned SourceReg,
MachineBasicBlock *SourceMBB);
void removeSource(unsigned DestReg, unsigned SourceReg,
@@ -224,10 +223,6 @@ void PHILinearize::deleteDef(unsigned De
delete InfoElement;
}
-DebugLoc PHILinearize::getDebugLoc(unsigned DestReg) {
- return phiInfoElementGetDebugLoc(findPHIInfoElement(DestReg));
-}
-
void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg,
MachineBasicBlock *SourceMBB) {
phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
@@ -260,18 +255,18 @@ unsigned PHILinearize::getNumSources(uns
void PHILinearize::dump(MachineRegisterInfo *MRI) {
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- DEBUG(dbgs() << "=PHIInfo Start=\n");
+ dbgs() << "=PHIInfo Start=\n";
for (auto PII : this->PHIInfo) {
PHIInfoElementT &Element = *PII;
- DEBUG(dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI)
- << " Sources: {");
+ dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI)
+ << " Sources: {";
for (auto &SI : Element.Sources) {
- DEBUG(dbgs() << PrintReg(SI.first, TRI) << "(BB#"
- << SI.second->getNumber() << "),");
+ dbgs() << PrintReg(SI.first, TRI) << "(BB#"
+ << SI.second->getNumber() << "),";
}
- DEBUG(dbgs() << "}\n");
+ dbgs() << "}\n";
}
- DEBUG(dbgs() << "=PHIInfo End=\n");
+ dbgs() << "=PHIInfo End=\n";
}
void PHILinearize::clear() { PHIInfo = PHIInfoT(); }
@@ -379,8 +374,6 @@ public:
void addLiveOut(unsigned VReg);
- void addLiveOuts(LinearizedRegion *LRegion);
-
void removeLiveOut(unsigned Reg);
void replaceLiveOut(unsigned OldReg, unsigned NewReg);
@@ -417,8 +410,6 @@ public:
bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI);
- bool isDefinedInRegion(unsigned Reg, MachineRegisterInfo *MRI);
-
void removeFalseRegisterKills(MachineRegisterInfo *MRI);
void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI,
@@ -427,9 +418,6 @@ public:
LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
- LinearizedRegion(RegionMRT *Region, const MachineRegisterInfo *MRI,
- const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
-
LinearizedRegion();
~LinearizedRegion();
@@ -889,13 +877,6 @@ bool LinearizedRegion::getHasLoop() { re
void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); }
-void LinearizedRegion::addLiveOuts(LinearizedRegion *LRegion) {
- DenseSet<unsigned> *RegionLiveOuts = LRegion->getLiveOuts();
- for (auto R : *RegionLiveOuts) {
- addLiveOut(R);
- }
-}
-
void LinearizedRegion::removeLiveOut(unsigned Reg) {
if (isLiveOut(Reg))
LiveOuts.erase(Reg);
@@ -1013,23 +994,6 @@ bool LinearizedRegion::hasNoDef(unsigned
return MRI->def_begin(Reg) == MRI->def_end();
}
-bool LinearizedRegion::isDefinedInRegion(unsigned Reg,
- MachineRegisterInfo *MRI) {
- bool NoDef = hasNoDef(Reg, MRI);
- if (NoDef) {
- return false;
- }
-
- if (!MRI->hasOneDef(Reg)) {
- DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo())
- << " has multiple defs\n");
- }
-
- assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
- MachineOperand *Def = &(*(MRI->def_begin(Reg)));
- return contains(Def->getParent()->getParent());
-}
-
// After the code has been structurized, what was flagged as kills
// before are no longer register kills.
void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
@@ -1092,19 +1056,6 @@ LinearizedRegion::LinearizedRegion(Machi
Parent = nullptr;
}
-LinearizedRegion::LinearizedRegion(RegionMRT *Region,
- const MachineRegisterInfo *MRI,
- const TargetRegisterInfo *TRI,
- PHILinearize &PHIInfo) {
- setEntry(Region->getEntry());
- // We don't have a single exit block that is part of the region
- // at this point. When the transform is performed this block
- // will be created.
- setExit(nullptr);
- storeLiveOuts(Region, MRI, TRI, PHIInfo);
- Parent = nullptr;
-}
-
LinearizedRegion::LinearizedRegion() {
setEntry(nullptr);
setExit(nullptr);
@@ -1138,7 +1089,6 @@ private:
SmallVector<unsigned, 2> *RegionIndices);
void extractKilledPHIs(MachineBasicBlock *MBB);
- void extractKilledPHIs(LinearizedRegion *LRegion);
bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices,
unsigned *ReplaceReg);
@@ -1171,10 +1121,6 @@ private:
void transformSimpleIfRegion(RegionMRT *Region);
- bool regionIsSimpleLoop(RegionMRT *Region);
-
- void transformSimpleLoopRegion(RegionMRT *Region);
-
void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II);
void insertUnconditionalBranch(MachineBasicBlock *MBB,
@@ -1183,9 +1129,6 @@ private:
MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region);
- void replaceRegisterOutsideMBB(MachineBasicBlock *MBB, unsigned Register,
- unsigned NewRegister);
-
void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
MachineBasicBlock *MergeBB, unsigned DestRegister,
unsigned IfSourceRegister, unsigned CodeSourceRegister,
@@ -1313,53 +1256,6 @@ void AMDGPUMachineCFGStructurizer::trans
TII->convertNonUniformIfRegion(Entry, Exit);
}
-bool AMDGPUMachineCFGStructurizer::regionIsSimpleLoop(RegionMRT *Region) {
- MachineBasicBlock *Entry = Region->getEntry();
-
- if (Entry->succ_size() != 1) {
- return false;
- }
-
- int NumRegionExitEdges = 0;
- MachineBasicBlock *BackBlock = nullptr;
- for (MachineBasicBlock::const_pred_iterator PI = Entry->succ_begin(),
- PE = Entry->succ_end();
- PI != PE; ++PI) {
- MachineBasicBlock *CurrentSP = *PI;
- if (Region->contains(CurrentSP)) {
- BackBlock = CurrentSP;
- }
- }
-
- bool HasBackedge = false;
-
- for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(),
- SE = Entry->succ_end();
- SI != SE; ++SI) {
- MachineBasicBlock *CurrentBS = *SI;
- if (CurrentBS == Entry) {
- HasBackedge = true;
- }
- }
-
- return NumRegionExitEdges == 1 && BackBlock->succ_size() == 2 && HasBackedge;
-}
-
-void AMDGPUMachineCFGStructurizer::transformSimpleLoopRegion(RegionMRT *Region) {
- MachineBasicBlock *Entry = Region->getEntry();
- MachineBasicBlock *BackBlock = nullptr;
- for (MachineBasicBlock::const_pred_iterator PI = Entry->succ_begin(),
- PE = Entry->succ_end();
- PI != PE; ++PI) {
- MachineBasicBlock *CurrentSP = *PI;
- if (Region->contains(CurrentSP)) {
- BackBlock = CurrentSP;
- }
- }
-
- TII->convertNonUniformLoopRegion(Entry, BackBlock);
-}
-
static void fixMBBTerminator(MachineBasicBlock *MBB) {
if (MBB->succ_size() == 1) {
@@ -1518,11 +1414,6 @@ void AMDGPUMachineCFGStructurizer::extra
}
}
-void AMDGPUMachineCFGStructurizer::extractKilledPHIs(LinearizedRegion *LRegion) {
- // PHIs can only exist in the entry block.
- extractKilledPHIs(LRegion->getEntry());
-}
-
static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices,
unsigned Index) {
for (auto i : PHIRegionIndices) {
@@ -1835,28 +1726,6 @@ AMDGPUMachineCFGStructurizer::createLine
return LastMerge;
}
-void AMDGPUMachineCFGStructurizer::replaceRegisterOutsideMBB(MachineBasicBlock *MBB,
- unsigned Register,
- unsigned NewRegister) {
- assert(Register != NewRegister && "Cannot replace a reg with itself");
-
- for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
- E = MRI->reg_end();
- I != E;) {
- MachineOperand &O = *I;
- ++I;
- if (O.getParent()->getParent() != MBB) {
- if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
- llvm_unreachable("Cannot substitute physical registers");
- // We don't handle physical registers, but if we need to in the future
- // This is how we do it: O.substPhysReg(NewRegister, *TRI);
- } else {
- O.setReg(NewRegister);
- }
- }
- }
-}
-
void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB,
MachineBasicBlock *CodeBB,
MachineBasicBlock *MergeBB,
@@ -1984,7 +1853,7 @@ MachineBasicBlock *AMDGPUMachineCFGStruc
SelectBB->getNumber() /* CodeBBStart->getNumber() */);
if (&(*(IfBB->getParent()->begin())) == IfBB) {
TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg,
- CodeBBStart->getNumber());
+ CodeBBStart->getNumber());
}
MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
ArrayRef<MachineOperand> Cond(RegOp);
@@ -2021,8 +1890,8 @@ void AMDGPUMachineCFGStructurizer::rewri
// This is an exit block, hence no successors. We will assign the
// bb select register to the entry block.
TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
- BBSelectReg,
- CodeBB->getParent()->begin()->getNumber());
+ BBSelectReg,
+ CodeBB->getParent()->begin()->getNumber());
insertUnconditionalBranch(CodeBB, MergeBB, DL);
return;
}
@@ -2036,15 +1905,15 @@ void AMDGPUMachineCFGStructurizer::rewri
if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) {
TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
- BBSelectReg, TrueBB->getNumber());
+ BBSelectReg, TrueBB->getNumber());
} else {
const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg);
unsigned TrueBBReg = MRI->createVirtualRegister(RegClass);
unsigned FalseBBReg = MRI->createVirtualRegister(RegClass);
TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
- TrueBBReg, TrueBB->getNumber());
+ TrueBBReg, TrueBB->getNumber());
TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
- FalseBBReg, FalseBB->getNumber());
+ FalseBBReg, FalseBB->getNumber());
ensureCondIsNotKilled(Cond);
TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL,
BBSelectReg, Cond, TrueBBReg, FalseBBReg);
@@ -2114,7 +1983,7 @@ void AMDGPUMachineCFGStructurizer::inser
if (IsLastDef) {
const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator());
TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL,
- NextDestReg, 0);
+ NextDestReg, 0);
PHIInfo.deleteDef(DestReg);
} else {
PHIInfo.replaceDef(DestReg, NextDestReg);
@@ -2193,7 +2062,7 @@ void AMDGPUMachineCFGStructurizer::rewri
DEBUG(dbgs() << "Insertion done.\n");
}
- PHIInfo.dump(MRI);
+ DEBUG(PHIInfo.dump(MRI));
}
void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) {
@@ -2246,10 +2115,9 @@ void AMDGPUMachineCFGStructurizer::creat
unsigned DestReg) {
MachineBasicBlock *Entry = CurrentRegion->getEntry();
MachineBasicBlock *Exit = CurrentRegion->getExit();
- MachineBasicBlock *Pred = *(Entry->pred_begin());
DEBUG(dbgs() << "RegionExit: " << Exit->getNumber()
- << " Pred: " << Pred->getNumber() << "\n");
+ << " Pred: " << (*(Entry->pred_begin()))->getNumber() << "\n");
int NumSources = 0;
auto SE = PHIInfo.sources_end(DestReg);
@@ -2744,8 +2612,6 @@ bool AMDGPUMachineCFGStructurizer::struc
DEBUG(PHIInfo.dump(MRI));
- auto Entry = Region->getEntry();
-
SetVector<MRT *> *Children = Region->getChildren();
DEBUG(dbgs() << "===========If Region Start===============\n");
if (LRegion->getHasLoop()) {
@@ -2847,8 +2713,8 @@ bool AMDGPUMachineCFGStructurizer::struc
MRI->createVirtualRegister(MRI->getRegClass(InReg));
unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg));
TII->materializeImmediate(*(LRegion->getEntry()),
- LRegion->getEntry()->getFirstTerminator(), DL,
- NewInReg, Region->getEntry()->getNumber());
+ LRegion->getEntry()->getFirstTerminator(), DL,
+ NewInReg, Region->getEntry()->getNumber());
// Need to be careful about updating the registers inside the region.
LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI);
DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n");
@@ -2863,7 +2729,7 @@ bool AMDGPUMachineCFGStructurizer::struc
TII->insertReturn(*LastMerge);
}
- DEBUG(Entry->getParent()->dump());
+ DEBUG(Region->getEntry()->getParent()->dump());
DEBUG(LRegion->print(dbgs(), TRI));
DEBUG(PHIInfo.dump(MRI));
@@ -3016,4 +2882,3 @@ bool AMDGPUMachineCFGStructurizer::runOn
FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() {
return new AMDGPUMachineCFGStructurizer();
}
-
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=303111&r1=303110&r2=303111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon May 15 15:18:37 2017
@@ -118,6 +118,13 @@ static cl::opt<bool> EnableSIInsertWaitc
cl::desc("Use new waitcnt insertion pass"),
cl::init(false));
+// Option to run late CFG structurizer
+static cl::opt<bool> LateCFGStructurize(
+ "amdgpu-late-structurize",
+ cl::desc("Enable late CFG structurization"),
+ cl::init(false),
+ cl::Hidden);
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -702,11 +709,15 @@ bool GCNPassConfig::addPreISel() {
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
// regions formed by them.
addPass(&AMDGPUUnifyDivergentExitNodesID);
- addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
+ if (!LateCFGStructurize) {
+ addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
+ }
addPass(createSinkingPass());
addPass(createSITypeRewriter());
addPass(createAMDGPUAnnotateUniformValues());
- addPass(createSIAnnotateControlFlowPass());
+ if (!LateCFGStructurize) {
+ addPass(createSIAnnotateControlFlowPass());
+ }
return false;
}
@@ -770,6 +781,9 @@ bool GCNPassConfig::addGlobalInstruction
#endif
void GCNPassConfig::addPreRegAlloc() {
+ if (LateCFGStructurize) {
+ addPass(createAMDGPUMachineCFGStructurizerPass());
+ }
addPass(createSIWholeQuadModePass());
}
Modified: llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt?rev=303111&r1=303110&r2=303111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt Mon May 15 15:18:37 2017
@@ -48,6 +48,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUISelDAGToDAG.cpp
AMDGPULowerIntrinsics.cpp
AMDGPUMCInstLower.cpp
+ AMDGPUMachineCFGStructurizer.cpp
AMDGPUMachineFunction.cpp
AMDGPUUnifyMetadata.cpp
AMDGPUOpenCLImageTypeLoweringPass.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=303111&r1=303110&r2=303111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Mon May 15 15:18:37 2017
@@ -496,6 +496,188 @@ int SIInstrInfo::commuteOpcode(unsigned
return Opcode;
}
+void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const DebugLoc &DL, unsigned DestReg,
+ int64_t Value) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
+ if (RegClass == &AMDGPU::SReg_32RegClass ||
+ RegClass == &AMDGPU::SGPR_32RegClass ||
+ RegClass == &AMDGPU::SReg_32_XM0RegClass ||
+ RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+ .addImm(Value);
+ return;
+ }
+
+ if (RegClass == &AMDGPU::SReg_64RegClass ||
+ RegClass == &AMDGPU::SGPR_64RegClass ||
+ RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
+ .addImm(Value);
+ return;
+ }
+
+ if (RegClass == &AMDGPU::VGPR_32RegClass) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ .addImm(Value);
+ return;
+ }
+ if (RegClass == &AMDGPU::VReg_64RegClass) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
+ .addImm(Value);
+ return;
+ }
+
+ unsigned EltSize = 4;
+ unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+ if (RI.isSGPRClass(RegClass)) {
+ if (RI.getRegSizeInBits(*RegClass) > 32) {
+ Opcode = AMDGPU::S_MOV_B64;
+ EltSize = 8;
+ } else {
+ Opcode = AMDGPU::S_MOV_B32;
+ EltSize = 4;
+ }
+ }
+
+ ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
+ for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
+ int64_t IdxValue = Idx == 0 ? Value : 0;
+
+ MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
+ get(Opcode), RI.getSubReg(DestReg, Idx));
+ Builder.addImm(IdxValue);
+ }
+}
+
+const TargetRegisterClass *
+SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const {
+ return &AMDGPU::VGPR_32RegClass;
+}
+
+void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DstReg,
+ ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg,
+ unsigned FalseReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RegClass = MRI.getRegClass(DstReg);
+ assert(RegClass == &AMDGPU::VGPR_32RegClass && "Not a VGPR32 reg");
+
+ if (Cond.size() == 1) {
+ BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg)
+ .add(Cond[0]);
+ } else if (Cond.size() == 2) {
+ assert(Cond[0].isImm() && "Cond[0] is not an immediate");
+ switch (Cond[0].getImm()) {
+ case SIInstrInfo::SCC_TRUE: {
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+ .addImm(-1)
+ .addImm(0);
+ BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg)
+ .addReg(SReg);
+ break;
+ }
+ case SIInstrInfo::SCC_FALSE: {
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+ .addImm(0)
+ .addImm(-1);
+ BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg)
+ .addReg(SReg);
+ break;
+ }
+ case SIInstrInfo::VCCNZ: {
+ MachineOperand RegOp = Cond[1];
+ RegOp.setImplicit(false);
+ BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg)
+ .add(RegOp);
+ break;
+ }
+ case SIInstrInfo::VCCZ: {
+ MachineOperand RegOp = Cond[1];
+ RegOp.setImplicit(false);
+ BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addReg(TrueReg)
+ .addReg(FalseReg)
+ .add(RegOp);
+ break;
+ }
+ case SIInstrInfo::EXECNZ: {
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
+ .addImm(0);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+ .addImm(-1)
+ .addImm(0);
+ BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg)
+ .addReg(SReg);
+ break;
+ }
+ case SIInstrInfo::EXECZ: {
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
+ .addImm(0);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+ .addImm(0)
+ .addImm(-1);
+ BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg)
+ .addReg(SReg);
+ llvm_unreachable("Unhandled branch predicate EXECZ");
+ break;
+ }
+ default:
+ llvm_unreachable("invalid branch predicate");
+ }
+ } else {
+ llvm_unreachable("Can only handle Cond size 1 or 2");
+ }
+}
+
+unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ unsigned SrcReg, int Value) const {
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
+ .addImm(Value)
+ .addReg(SrcReg);
+
+ return Reg;
+}
+
+unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ unsigned SrcReg, int Value) const {
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
+ .addImm(Value)
+ .addReg(SrcReg);
+
+ return Reg;
+}
+
unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
if (RI.getRegSizeInBits(*DstRC) == 32) {
@@ -834,6 +1016,20 @@ void SIInstrInfo::insertNoop(MachineBasi
insertWaitStates(MBB, MI, 1);
}
+void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
+ auto MF = MBB.getParent();
+ SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+
+ assert(Info->isEntryFunction());
+
+ if (MBB.succ_empty()) {
+ bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
+ if (HasNoTerminator)
+ BuildMI(MBB, MBB.end(), DebugLoc(),
+ get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
+ }
+}
+
unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default: return 1; // FIXME: Do wait states equal cycles?
@@ -1241,14 +1437,20 @@ bool SIInstrInfo::analyzeBranchImpl(Mach
return false;
}
- BranchPredicate Pred = getBranchPredicate(I->getOpcode());
- if (Pred == INVALID_BR)
- return true;
+ MachineBasicBlock *CondBB = nullptr;
- MachineBasicBlock *CondBB = I->getOperand(0).getMBB();
- Cond.push_back(MachineOperand::CreateImm(Pred));
- Cond.push_back(I->getOperand(1)); // Save the branch register.
+ if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
+ CondBB = I->getOperand(1).getMBB();
+ Cond.push_back(I->getOperand(0));
+ } else {
+ BranchPredicate Pred = getBranchPredicate(I->getOpcode());
+ if (Pred == INVALID_BR)
+ return true;
+ CondBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(Pred));
+ Cond.push_back(I->getOperand(1)); // Save the branch register.
+ }
++I;
if (I == MBB.end()) {
@@ -1351,6 +1553,13 @@ unsigned SIInstrInfo::insertBranch(Machi
return 1;
}
+ if(Cond.size() == 1 && Cond[0].isReg()) {
+ BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
+ .add(Cond[0])
+ .addMBB(TBB);
+ return 1;
+ }
+
assert(TBB && Cond[0].isImm());
unsigned Opcode
@@ -1390,9 +1599,16 @@ unsigned SIInstrInfo::insertBranch(Machi
bool SIInstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
- assert(Cond.size() == 2);
- Cond[0].setImm(-Cond[0].getImm());
- return false;
+ if (Cond.size() != 2) {
+ return true;
+ }
+
+ if (Cond[0].isImm()) {
+ Cond[0].setImm(-Cond[0].getImm());
+ return false;
+ }
+
+ return true;
}
bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
@@ -3920,6 +4136,82 @@ bool SIInstrInfo::mayAccessFlatAddressSp
return false;
}
+bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const {
+ return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
+}
+
+void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
+ MachineBasicBlock *IfEnd) const {
+ MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator();
+ assert(TI != IfEntry->end());
+
+ MachineInstr *Branch = &(*TI);
+ MachineFunction *MF = IfEntry->getParent();
+ MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
+
+ if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
+ unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ MachineInstr *SIIF =
+ BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
+ .add(Branch->getOperand(0))
+ .add(Branch->getOperand(1));
+ MachineInstr *SIEND =
+ BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
+ .addReg(DstReg);
+
+ IfEntry->erase(TI);
+ IfEntry->insert(IfEntry->end(), SIIF);
+ IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
+ }
+}
+
+void SIInstrInfo::convertNonUniformLoopRegion(
+ MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const {
+ MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator();
+ // We expect 2 terminators, one conditional and one unconditional.
+ assert(TI != LoopEnd->end());
+
+ MachineInstr *Branch = &(*TI);
+ MachineFunction *MF = LoopEnd->getParent();
+ MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
+
+ if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
+
+ unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ MachineInstrBuilder HeaderPHIBuilder =
+ BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
+ for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
+ E = LoopEntry->pred_end();
+ PI != E; ++PI) {
+ if (*PI == LoopEnd) {
+ HeaderPHIBuilder.addReg(BackEdgeReg);
+ } else {
+ MachineBasicBlock *PMBB = *PI;
+ unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
+ ZeroReg, 0);
+ HeaderPHIBuilder.addReg(ZeroReg);
+ }
+ HeaderPHIBuilder.addMBB(*PI);
+ }
+ MachineInstr *HeaderPhi = HeaderPHIBuilder;
+ MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(),
+ get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
+ .addReg(DstReg)
+ .add(Branch->getOperand(0));
+ MachineInstr *SILOOP =
+ BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
+ .addReg(BackEdgeReg)
+ .addMBB(LoopEntry);
+
+ LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
+ LoopEnd->erase(TI);
+ LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
+ LoopEnd->insert(LoopEnd->end(), SILOOP);
+ }
+}
+
ArrayRef<std::pair<int, const char *>>
SIInstrInfo::getSerializableTargetIndices() const {
static const std::pair<int, const char *> TargetIndices[] = {
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=303111&r1=303110&r2=303111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Mon May 15 15:18:37 2017
@@ -143,6 +143,23 @@ public:
RegScavenger *RS, unsigned TmpReg,
unsigned Offset, unsigned Size) const;
+ void materializeImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const DebugLoc &DL,
+ unsigned DestReg,
+ int64_t Value) const;
+
+ const TargetRegisterClass *getPreferredSelectRegClass(
+ unsigned Size) const;
+
+ unsigned insertNE(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ unsigned SrcReg, int Value) const;
+
+ unsigned insertEQ(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ unsigned SrcReg, int Value) const;
+
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, unsigned SrcReg,
bool isKill, int FrameIndex,
@@ -193,7 +210,7 @@ public:
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const override;
+ bool AllowModify = false) const override;
unsigned removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved = nullptr) const override;
@@ -218,6 +235,11 @@ public:
unsigned DstReg, ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg) const override;
+ void insertVectorSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ unsigned DstReg, ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg) const;
+
bool
areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const override;
@@ -705,6 +727,7 @@ public:
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+ void insertReturn(MachineBasicBlock &MBB) const;
/// \brief Return the number of wait states that result from executing this
/// instruction.
unsigned getNumWaitStates(const MachineInstr &MI) const;
@@ -750,6 +773,14 @@ public:
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
+ bool isNonUniformBranchInstr(MachineInstr &Instr) const;
+
+ void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
+ MachineBasicBlock *IfEnd) const;
+
+ void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
+ MachineBasicBlock *LoopEnd) const;
+
ArrayRef<std::pair<int, const char *>>
getSerializableTargetIndices() const override;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=303111&r1=303110&r2=303111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Mon May 15 15:18:37 2017
@@ -174,6 +174,13 @@ def SI_MASK_BRANCH : VPseudoInstSI <
let isTerminator = 1 in {
+ def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI <
+ (outs),
+ (ins SReg_64:$vcc, brtarget:$target),
+ [(brcond i1:$vcc, bb:$target)]> {
+ let Size = 12;
+}
+
def SI_IF: CFPseudoInstSI <
(outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
[(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
More information about the llvm-commits
mailing list