[llvm] r310336 - AMDGPU: Move R600 parts of AMDGPUISelDAGToDAG into their own class
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 7 21:57:55 PDT 2017
Author: tstellar
Date: Mon Aug 7 21:57:55 2017
New Revision: 310336
URL: http://llvm.org/viewvc/llvm-project?rev=310336&view=rev
Log:
AMDGPU: Move R600 parts of AMDGPUISelDAGToDAG into their own class
Summary: This refactoring is required in order to split the R600 and GCN tablegen files.
Reviewers: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D36286
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=310336&r1=310335&r2=310336&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Mon Aug 7 21:57:55 2017
@@ -34,6 +34,7 @@ FunctionPass *createR600ClauseMergePass(
FunctionPass *createR600Packetizer();
FunctionPass *createR600ControlFlowFinalizer();
FunctionPass *createAMDGPUCFGStructurizerPass();
+FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
// SI Passes
FunctionPass *createSIAnnotateControlFlowPass();
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=310336&r1=310335&r2=310336&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Mon Aug 7 21:57:55 2017
@@ -88,6 +88,9 @@ public:
StringRef getPassName() const override;
void PostprocessISelDAG() override;
+protected:
+ void SelectBuildVector(SDNode *N, unsigned RegClassID);
+
private:
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
bool isNoNanSrc(SDValue N) const;
@@ -106,8 +109,8 @@ private:
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
SDValue& Offset);
- bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+ virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+ virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
unsigned OffsetBits) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
@@ -207,10 +210,24 @@ private:
void SelectBRCOND(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
+protected:
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
+class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
+public:
+ explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
+ AMDGPUDAGToDAGISel(TM, OptLevel) {}
+
+ void Select(SDNode *N) override;
+
+ bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) override;
+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) override;
+};
+
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
@@ -226,6 +243,13 @@ FunctionPass *llvm::createAMDGPUISelDag(
return new AMDGPUDAGToDAGISel(TM, OptLevel);
}
+/// \brief This pass converts a legalized DAG into a R600-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
+ CodeGenOpt::Level OptLevel) {
+ return new R600DAGToDAGISel(TM, OptLevel);
+}
+
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
@@ -304,8 +328,7 @@ const TargetRegisterClass *AMDGPUDAGToDA
}
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
+ if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
return N;
const SITargetLowering& Lowering =
@@ -359,6 +382,59 @@ static bool getConstantValue(SDValue N,
return false;
}
+void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
+ unsigned Opc = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ unsigned NumVectorElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
+ SDLoc DL(N);
+ SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
+
+ if (NumVectorElts == 1) {
+ CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
+ RegClass);
+ return;
+ }
+
+ assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
+ "supported yet");
+ // 16 = Max Num Vector Elements
+ // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
+ // 1 = Vector Register Class
+ SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
+
+ RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
+ bool IsRegSeq = true;
+ unsigned NOps = N->getNumOperands();
+ for (unsigned i = 0; i < NOps; i++) {
+ // XXX: Why is this here?
+ if (isa<RegisterSDNode>(N->getOperand(i))) {
+ IsRegSeq = false;
+ break;
+ }
+ RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
+ RegSeqArgs[1 + (2 * i) + 1] =
+ CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
+ MVT::i32);
+ }
+ if (NOps != NumVectorElts) {
+ // Fill in the missing undef elements if this was a scalar_to_vector.
+ assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
+ MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ DL, EltVT);
+ for (unsigned i = NOps; i < NumVectorElts; ++i) {
+ RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
+ RegSeqArgs[1 + (2 * i) + 1] =
+ CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
+ }
+ }
+
+ if (!IsRegSeq)
+ SelectCode(N);
+ CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
+}
+
void AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
@@ -381,8 +457,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
case ISD::SUB:
case ISD::SUBC:
case ISD::SUBE: {
- if (N->getValueType(0) != MVT::i64 ||
- Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ if (N->getValueType(0) != MVT::i64)
break;
SelectADD_SUB_I64(N);
@@ -403,10 +478,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
}
case ISD::SCALAR_TO_VECTOR:
- case AMDGPUISD::BUILD_VERTICAL_VECTOR:
case ISD::BUILD_VECTOR: {
- unsigned RegClassID;
- const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
@@ -427,80 +499,12 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
}
assert(EltVT.bitsEq(MVT::i32));
-
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
- } else {
- // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
- // that adds a 128 bits reg copy when going through TwoAddressInstructions
- // pass. We want to avoid 128 bits copies as much as possible because they
- // can't be bundled by our scheduler.
- switch(NumVectorElts) {
- case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
- case 4:
- if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
- RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
- else
- RegClassID = AMDGPU::R600_Reg128RegClassID;
- break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
- }
- }
-
- SDLoc DL(N);
- SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
-
- if (NumVectorElts == 1) {
- CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
- RegClass);
- return;
- }
-
- assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
- "supported yet");
- // 16 = Max Num Vector Elements
- // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
- // 1 = Vector Register Class
- SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
-
- RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
- bool IsRegSeq = true;
- unsigned NOps = N->getNumOperands();
- for (unsigned i = 0; i < NOps; i++) {
- // XXX: Why is this here?
- if (isa<RegisterSDNode>(N->getOperand(i))) {
- IsRegSeq = false;
- break;
- }
- RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
- RegSeqArgs[1 + (2 * i) + 1] =
- CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
- MVT::i32);
- }
-
- if (NOps != NumVectorElts) {
- // Fill in the missing undef elements if this was a scalar_to_vector.
- assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
-
- MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- DL, EltVT);
- for (unsigned i = NOps; i < NumVectorElts; ++i) {
- RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
- RegSeqArgs[1 + (2 * i) + 1] =
- CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
- }
- }
-
- if (!IsRegSeq)
- break;
- CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
+ unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
+ SelectBuildVector(N, RegClassID);
return;
}
case ISD::BUILD_PAIR: {
SDValue RC, SubReg0, SubReg1;
- if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- break;
- }
SDLoc DL(N);
if (N->getValueType(0) == MVT::i128) {
RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
@@ -522,8 +526,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
case ISD::Constant:
case ISD::ConstantFP: {
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
+ if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
break;
uint64_t Imm;
@@ -558,9 +561,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- break;
-
// There is a scalar version available, but unlike the vector version which
// has a separate operand for the offset and width, the scalar version packs
// the width and offset into a single operand. Try to move to the scalar
@@ -600,8 +600,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
case ISD::SRL:
case ISD::SRA:
case ISD::SIGN_EXTEND_INREG:
- if (N->getValueType(0) != MVT::i32 ||
- Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ if (N->getValueType(0) != MVT::i32)
break;
SelectS_BFE(N);
@@ -663,32 +662,8 @@ bool AMDGPUDAGToDAGISel::SelectGlobalVal
}
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *IMMOffset;
-
- if (Addr.getOpcode() == ISD::ADD
- && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && isInt<16>(IMMOffset->getZExtValue())) {
-
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- // If the pointer address is constant, we can move it to the offset field.
- } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
- && isInt<16>(IMMOffset->getZExtValue())) {
- Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
- SDLoc(CurDAG->getEntryNode()),
- AMDGPU::ZERO, MVT::i32);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- }
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- return true;
+ SDValue &Offset) {
+ return false;
}
bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
@@ -1956,3 +1931,93 @@ void AMDGPUDAGToDAGISel::PostprocessISel
CurDAG->RemoveDeadNodes();
} while (IsModified);
}
+
+void R600DAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return; // Already selected.
+ }
+
+ switch (Opc) {
+ default: break;
+ case AMDGPUISD::BUILD_VERTICAL_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ case ISD::BUILD_VECTOR: {
+ EVT VT = N->getValueType(0);
+ unsigned NumVectorElts = VT.getVectorNumElements();
+ unsigned RegClassID;
+ // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ switch(NumVectorElts) {
+ case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
+ case 4:
+ if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+ RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
+ else
+ RegClassID = AMDGPU::R600_Reg128RegClassID;
+ break;
+ default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+ }
+ SelectBuildVector(N, RegClassID);
+ return;
+ }
+ }
+
+ SelectCode(N);
+}
+
+bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *C;
+ SDLoc DL(Addr);
+
+ if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+ Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
+ Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ }
+
+ return true;
+}
+
+bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *IMMOffset;
+
+ if (Addr.getOpcode() == ISD::ADD
+ && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
+ return true;
+ // If the pointer address is constant, we can move it to the offset field.
+ } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ SDLoc(CurDAG->getEntryNode()),
+ AMDGPU::ZERO, MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
+ return true;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ return true;
+}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=310336&r1=310335&r2=310336&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Aug 7 21:57:55 2017
@@ -488,6 +488,7 @@ public:
}
bool addPreISel() override;
+ bool addInstSelector() override;
void addPreRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
@@ -660,6 +661,11 @@ bool R600PassConfig::addPreISel() {
return false;
}
+bool R600PassConfig::addInstSelector() {
+ addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
+ return false;
+}
+
void R600PassConfig::addPreRegAlloc() {
addPass(createR600VectorRegMerger());
}
More information about the llvm-commits
mailing list