[llvm] 47d6274 - [NFC][AMDGPU] Reduce includes dependencies, part 2
Daniil Fukalov via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 1 07:50:39 PDT 2021
Author: Daniil Fukalov
Date: 2021-10-01T17:50:20+03:00
New Revision: 47d6274d4c31c9b46d059a7421277bef0395a869
URL: https://github.com/llvm/llvm-project/commit/47d6274d4c31c9b46d059a7421277bef0395a869
DIFF: https://github.com/llvm/llvm-project/commit/47d6274d4c31c9b46d059a7421277bef0395a869.diff
LOG: [NFC][AMDGPU] Reduce includes dependencies, part 2
1. Splitted out some parts of R600 target to separate modules/headers.
2. Reduced some include lists in headers.
3. Minor forward declarations, redundant includes and flags in GCNSubtarget
cleanup.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D109351
Added:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
Modified:
llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
llvm/lib/Target/AMDGPU/CMakeLists.txt
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 7b2af01b46745..dd3eb3849eac1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUAliasAnalysis.h"
+#include "AMDGPU.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Instructions.h"
@@ -37,6 +38,10 @@ ImmutablePass *llvm::createAMDGPUExternalAAWrapperPass() {
return new AMDGPUExternalAAWrapper();
}
+AMDGPUAAWrapperPass::AMDGPUAAWrapperPass() : ImmutablePass(ID) {
+ initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index c694e20c623db..22be014813b03 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -12,7 +12,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
-#include "AMDGPU.h"
#include "llvm/Analysis/AliasAnalysis.h"
namespace llvm {
@@ -66,9 +65,7 @@ class AMDGPUAAWrapperPass : public ImmutablePass {
public:
static char ID;
- AMDGPUAAWrapperPass() : ImmutablePass(ID) {
- initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ AMDGPUAAWrapperPass();
AMDGPUAAResult &getResult() { return *Result; }
const AMDGPUAAResult &getResult() const { return *Result; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5aa89e902b60e..20defbc883c18 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/R600MCTargetDesc.h"
-#include "R600.h"
-#include "R600Subtarget.h"
+#include "R600RegisterInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -35,287 +35,12 @@
using namespace llvm;
-namespace llvm {
-
-class R600InstrInfo;
-
-} // end namespace llvm
-
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
namespace {
-static bool isNullConstantOrUndef(SDValue V) {
- if (V.isUndef())
- return true;
-
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isZero();
-}
-
-static bool getConstantValue(SDValue N, uint32_t &Out) {
- // This is only used for packed vectors, where using 0 for undef should
- // always be good.
- if (N.isUndef()) {
- Out = 0;
- return true;
- }
-
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
- Out = C->getAPIntValue().getSExtValue();
- return true;
- }
-
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
- Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
- return true;
- }
-
- return false;
-}
-
-// TODO: Handle undef as zero
-static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
- bool Negate = false) {
- assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
- uint32_t LHSVal, RHSVal;
- if (getConstantValue(N->getOperand(0), LHSVal) &&
- getConstantValue(N->getOperand(1), RHSVal)) {
- SDLoc SL(N);
- uint32_t K = Negate ?
- (-LHSVal & 0xffff) | (-RHSVal << 16) :
- (LHSVal & 0xffff) | (RHSVal << 16);
- return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
- DAG.getTargetConstant(K, SL, MVT::i32));
- }
-
- return nullptr;
-}
-
-static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
- return packConstantV2I16(N, DAG, true);
-}
-
-/// AMDGPU specific code to select AMDGPU machine instructions for
-/// SelectionDAG operations.
-class AMDGPUDAGToDAGISel : public SelectionDAGISel {
- // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
- // make the right decision when generating code for
diff erent targets.
- const GCNSubtarget *Subtarget;
-
- // Default FP mode for the current function.
- AMDGPU::SIModeRegisterDefaults Mode;
-
- bool EnableLateStructurizeCFG;
-
- // Instructions that will be lowered with a final instruction that zeros the
- // high result bits.
- bool fp16SrcZerosHighBits(unsigned Opc) const;
-
-public:
- explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
- CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
- : SelectionDAGISel(*TM, OptLevel) {
- EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
- }
- ~AMDGPUDAGToDAGISel() override = default;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AMDGPUArgumentUsageInfo>();
- AU.addRequired<LegacyDivergenceAnalysis>();
-#ifdef EXPENSIVE_CHECKS
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
-#endif
- SelectionDAGISel::getAnalysisUsage(AU);
- }
-
- bool matchLoadD16FromBuildVector(SDNode *N) const;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
- void PreprocessISelDAG() override;
- void Select(SDNode *N) override;
- StringRef getPassName() const override;
- void PostprocessISelDAG() override;
-
-protected:
- void SelectBuildVector(SDNode *N, unsigned RegClassID);
-
-private:
- std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
- bool isNoNanSrc(SDValue N) const;
- bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
- bool isNegInlineImmediate(const SDNode *N) const {
- return isInlineImmediate(N, true);
- }
-
- bool isInlineImmediate16(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate32(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate64(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate(const APFloat &Imm) const {
- return Subtarget->getInstrInfo()->isInlineConstant(Imm);
- }
-
- bool isVGPRImm(const SDNode *N) const;
- bool isUniformLoad(const SDNode *N) const;
- bool isUniformBr(const SDNode *N) const;
-
- bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
- SDValue &RHS) const;
-
- MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
-
- SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
- SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
- SDNode *glueCopyToM0LDSInit(SDNode *N) const;
-
- const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
- virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
- virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
- bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
- unsigned Size) const;
- bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
- bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1) const;
- bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1) const;
- bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1, unsigned Size) const;
- bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &Addr64) const;
- bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset) const;
- bool SelectMUBUFScratchOffen(SDNode *Parent,
- SDValue Addr, SDValue &RSrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &ImmOffset) const;
- bool SelectMUBUFScratchOffset(SDNode *Parent,
- SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset) const;
-
- bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset) const;
-
- bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, uint64_t FlatVariant) const;
- bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
- SDValue &VOffset, SDValue &Offset) const;
- bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
- SDValue &Offset) const;
-
- bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
- bool &Imm) const;
- SDValue Expand32BitAddress(SDValue Addr) const;
- bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
- bool &Imm) const;
- bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
- bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
- bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
-
- bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
- bool AllowAbs = true) const;
- bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
- bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
-
- bool SelectVOP3OMods(SDValue In, SDValue &Src,
- SDValue &Clamp, SDValue &Omod) const;
-
- bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
- bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- SDValue getHi16Elt(SDValue In) const;
-
- SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
-
- void SelectADD_SUB_I64(SDNode *N);
- void SelectAddcSubb(SDNode *N);
- void SelectUADDO_USUBO(SDNode *N);
- void SelectDIV_SCALE(SDNode *N);
- void SelectMAD_64_32(SDNode *N);
- void SelectFMA_W_CHAIN(SDNode *N);
- void SelectFMUL_W_CHAIN(SDNode *N);
-
- SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
- uint32_t Offset, uint32_t Width);
- void SelectS_BFEFromShifts(SDNode *N);
- void SelectS_BFE(SDNode *N);
- bool isCBranchSCC(const SDNode *N) const;
- void SelectBRCOND(SDNode *N);
- void SelectFMAD_FMA(SDNode *N);
- void SelectATOMIC_CMP_SWAP(SDNode *N);
- void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
- void SelectDS_GWS(SDNode *N, unsigned IntrID);
- void SelectInterpP1F16(SDNode *N);
- void SelectINTRINSIC_W_CHAIN(SDNode *N);
- void SelectINTRINSIC_WO_CHAIN(SDNode *N);
- void SelectINTRINSIC_VOID(SDNode *N);
-
-protected:
- // Include the pieces autogenerated from the target description.
-#include "AMDGPUGenDAGISel.inc"
-};
-
-class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
- const R600Subtarget *Subtarget;
-
- bool isConstantLoad(const MemSDNode *N, int cbID) const;
- bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
- bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
- SDValue& Offset);
-public:
- explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
- AMDGPUDAGToDAGISel(TM, OptLevel) {}
-
- void Select(SDNode *N) override;
-
- bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
- SDValue &Offset) override;
- bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset) override;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void PreprocessISelDAG() override {}
-
-protected:
- // Include the pieces autogenerated from the target description.
-#include "R600GenDAGISel.inc"
-};
-
static SDValue stripBitcast(SDValue Val) {
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
}
@@ -389,11 +114,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
return new AMDGPUDAGToDAGISel(TM, OptLevel);
}
-/// This pass converts a legalized DAG into a R600-specific
-// DAG, ready for instruction scheduling.
-FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
- CodeGenOpt::Level OptLevel) {
- return new R600DAGToDAGISel(TM, OptLevel);
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(
+ TargetMachine *TM /*= nullptr*/,
+ CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
+ : SelectionDAGISel(*TM, OptLevel) {
+ EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
}
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
@@ -471,6 +196,16 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
}
}
+void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AMDGPUArgumentUsageInfo>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
+#ifdef EXPENSIVE_CHECKS
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+#endif
+ SelectionDAGISel::getAnalysisUsage(AU);
+}
+
bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
assert(Subtarget->d16PreservesUnusedBits());
MVT VT = N->getValueType(0).getSimpleVT();
@@ -3121,128 +2856,3 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
CurDAG->RemoveDeadNodes();
} while (IsModified);
}
-
-bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &MF.getSubtarget<R600Subtarget>();
- return SelectionDAGISel::runOnMachineFunction(MF);
-}
-
-bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
- if (!N->readMem())
- return false;
- if (CbId == -1)
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
-
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
-}
-
-bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
- SDValue& IntPtr) {
- if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
- IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
- true);
- return true;
- }
- return false;
-}
-
-bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
- SDValue& BaseReg, SDValue &Offset) {
- if (!isa<ConstantSDNode>(Addr)) {
- BaseReg = Addr;
- Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
- return true;
- }
- return false;
-}
-
-void R600DAGToDAGISel::Select(SDNode *N) {
- unsigned int Opc = N->getOpcode();
- if (N->isMachineOpcode()) {
- N->setNodeId(-1);
- return; // Already selected.
- }
-
- switch (Opc) {
- default: break;
- case AMDGPUISD::BUILD_VERTICAL_VECTOR:
- case ISD::SCALAR_TO_VECTOR:
- case ISD::BUILD_VECTOR: {
- EVT VT = N->getValueType(0);
- unsigned NumVectorElts = VT.getVectorNumElements();
- unsigned RegClassID;
- // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
- // that adds a 128 bits reg copy when going through TwoAddressInstructions
- // pass. We want to avoid 128 bits copies as much as possible because they
- // can't be bundled by our scheduler.
- switch(NumVectorElts) {
- case 2: RegClassID = R600::R600_Reg64RegClassID; break;
- case 4:
- if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
- RegClassID = R600::R600_Reg128VerticalRegClassID;
- else
- RegClassID = R600::R600_Reg128RegClassID;
- break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
- }
- SelectBuildVector(N, RegClassID);
- return;
- }
- }
-
- SelectCode(N);
-}
-
-bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *C;
- SDLoc DL(Addr);
-
- if ((C = dyn_cast<ConstantSDNode>(Addr))) {
- Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
- (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
- Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
- (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
- }
-
- return true;
-}
-
-bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *IMMOffset;
-
- if (Addr.getOpcode() == ISD::ADD
- && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && isInt<16>(IMMOffset->getZExtValue())) {
-
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- // If the pointer address is constant, we can move it to the offset field.
- } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
- && isInt<16>(IMMOffset->getZExtValue())) {
- Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
- SDLoc(CurDAG->getEntryNode()),
- R600::ZERO, MVT::i32);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- }
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- return true;
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
new file mode 100644
index 0000000000000..6f41191658499
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -0,0 +1,257 @@
+//===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines an instruction selector for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
+
+#include "GCNSubtarget.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+static inline bool isNullConstantOrUndef(SDValue V) {
+ if (V.isUndef())
+ return true;
+
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isZero();
+}
+
+static inline bool getConstantValue(SDValue N, uint32_t &Out) {
+ // This is only used for packed vectors, where using 0 for undef should
+ // always be good.
+ if (N.isUndef()) {
+ Out = 0;
+ return true;
+ }
+
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ Out = C->getAPIntValue().getSExtValue();
+ return true;
+ }
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+ Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+// TODO: Handle undef as zero
+static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
+ bool Negate = false) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
+ uint32_t LHSVal, RHSVal;
+ if (getConstantValue(N->getOperand(0), LHSVal) &&
+ getConstantValue(N->getOperand(1), RHSVal)) {
+ SDLoc SL(N);
+ uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16)
+ : (LHSVal & 0xffff) | (RHSVal << 16);
+ return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
+ DAG.getTargetConstant(K, SL, MVT::i32));
+ }
+
+ return nullptr;
+}
+
+static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
+ return packConstantV2I16(N, DAG, true);
+}
+} // namespace
+
+/// AMDGPU specific code to select AMDGPU machine instructions for
+/// SelectionDAG operations.
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+ // make the right decision when generating code for
diff erent targets.
+ const GCNSubtarget *Subtarget;
+
+ // Default FP mode for the current function.
+ AMDGPU::SIModeRegisterDefaults Mode;
+
+ bool EnableLateStructurizeCFG;
+
+ // Instructions that will be lowered with a final instruction that zeros the
+ // high result bits.
+ bool fp16SrcZerosHighBits(unsigned Opc) const;
+
+public:
+ explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
+ CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
+ ~AMDGPUDAGToDAGISel() override = default;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool matchLoadD16FromBuildVector(SDNode *N) const;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void PreprocessISelDAG() override;
+ void Select(SDNode *N) override;
+ StringRef getPassName() const override;
+ void PostprocessISelDAG() override;
+
+protected:
+ void SelectBuildVector(SDNode *N, unsigned RegClassID);
+
+private:
+ std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
+ bool isNoNanSrc(SDValue N) const;
+ bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
+ bool isNegInlineImmediate(const SDNode *N) const {
+ return isInlineImmediate(N, true);
+ }
+
+ bool isInlineImmediate16(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate32(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate64(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate(const APFloat &Imm) const {
+ return Subtarget->getInstrInfo()->isInlineConstant(Imm);
+ }
+
+ bool isVGPRImm(const SDNode *N) const;
+ bool isUniformLoad(const SDNode *N) const;
+ bool isUniformBr(const SDNode *N) const;
+
+ bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
+ SDValue &RHS) const;
+
+ MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
+
+ SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
+ SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
+ SDNode *glueCopyToM0LDSInit(SDNode *N) const;
+
+ const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
+ virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+ virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
+ bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
+ unsigned Size) const;
+ bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
+ bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
+ bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
+ bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1, unsigned Size) const;
+ bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &Offset, SDValue &Offen,
+ SDValue &Idxen, SDValue &Addr64) const;
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &Offset) const;
+ bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
+ SDValue &Soffset, SDValue &Offset) const;
+
+ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset) const;
+
+ bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, uint64_t FlatVariant) const;
+ bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &VOffset, SDValue &Offset) const;
+ bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &Offset) const;
+
+ bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
+ bool &Imm) const;
+ SDValue Expand32BitAddress(SDValue Addr) const;
+ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
+ bool &Imm) const;
+ bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
+ bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
+ bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
+
+ bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
+ bool AllowAbs = true) const;
+ bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
+ bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+
+ bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
+ SDValue &Omod) const;
+
+ bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+ bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+ bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
+ unsigned &Mods) const;
+ bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+ SDValue getHi16Elt(SDValue In) const;
+
+ SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
+
+ void SelectADD_SUB_I64(SDNode *N);
+ void SelectAddcSubb(SDNode *N);
+ void SelectUADDO_USUBO(SDNode *N);
+ void SelectDIV_SCALE(SDNode *N);
+ void SelectMAD_64_32(SDNode *N);
+ void SelectFMA_W_CHAIN(SDNode *N);
+ void SelectFMUL_W_CHAIN(SDNode *N);
+
+ SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
+ uint32_t Offset, uint32_t Width);
+ void SelectS_BFEFromShifts(SDNode *N);
+ void SelectS_BFE(SDNode *N);
+ bool isCBranchSCC(const SDNode *N) const;
+ void SelectBRCOND(SDNode *N);
+ void SelectFMAD_FMA(SDNode *N);
+ void SelectATOMIC_CMP_SWAP(SDNode *N);
+ void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
+ void SelectDS_GWS(SDNode *N, unsigned IntrID);
+ void SelectInterpP1F16(SDNode *N);
+ void SelectINTRINSIC_W_CHAIN(SDNode *N);
+ void SelectINTRINSIC_WO_CHAIN(SDNode *N);
+ void SelectINTRINSIC_VOID(SDNode *N);
+
+protected:
+ // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index ccbcc867215c2..99e72815db2e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -12,12 +12,11 @@
//===----------------------------------------------------------------------===//
//
+#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "R600AsmPrinter.h"
-#include "R600Subtarget.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
@@ -35,36 +34,6 @@
using namespace llvm;
-namespace {
-
-class AMDGPUMCInstLower {
- MCContext &Ctx;
- const TargetSubtargetInfo &ST;
- const AsmPrinter &AP;
-
-public:
- AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
- const AsmPrinter &AP);
-
- bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
-
- /// Lower a MachineInstr to an MCInst
- void lower(const MachineInstr *MI, MCInst &OutMI) const;
-
-};
-
-class R600MCInstLower : public AMDGPUMCInstLower {
-public:
- R600MCInstLower(MCContext &ctx, const R600Subtarget &ST,
- const AsmPrinter &AP);
-
- /// Lower a MachineInstr to an MCInst
- void lower(const MachineInstr *MI, MCInst &OutMI) const;
-};
-
-
-} // End anonymous namespace
-
#include "AMDGPUGenMCPseudoLowering.inc"
AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx,
@@ -195,30 +164,6 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
return MCInstLowering.lowerOperand(MO, MCOp);
}
-static const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
- const Constant *CV,
- MCContext &OutContext) {
- // TargetMachine does not support llvm-style cast. Use C++-style cast.
- // This is safe since TM is always of type AMDGPUTargetMachine or its
- // derived class.
- auto &AT = static_cast<const AMDGPUTargetMachine&>(TM);
- auto *CE = dyn_cast<ConstantExpr>(CV);
-
- // Lower null pointers in private and local address space.
- // Clang generates addrspacecast for null pointers in private and local
- // address space, which needs to be lowered.
- if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
- auto Op = CE->getOperand(0);
- auto SrcAddr = Op->getType()->getPointerAddressSpace();
- if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) {
- auto DstAddr = CE->getType()->getPointerAddressSpace();
- return MCConstantExpr::create(AT.getNullPointerValue(DstAddr),
- OutContext);
- }
- }
- return nullptr;
-}
-
const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
return E;
@@ -332,47 +277,3 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
}
}
}
-
-R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST,
- const AsmPrinter &AP) :
- AMDGPUMCInstLower(Ctx, ST, AP) { }
-
-void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
- OutMI.setOpcode(MI->getOpcode());
- for (const MachineOperand &MO : MI->explicit_operands()) {
- MCOperand MCOp;
- lowerOperand(MO, MCOp);
- OutMI.addOperand(MCOp);
- }
-}
-
-void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
- const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
- R600MCInstLower MCInstLowering(OutContext, STI, *this);
-
- StringRef Err;
- if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
- LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
- C.emitError("Illegal instruction detected: " + Err);
- MI->print(errs());
- }
-
- if (MI->isBundle()) {
- const MachineBasicBlock *MBB = MI->getParent();
- MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
- while (I != MBB->instr_end() && I->isInsideBundle()) {
- emitInstruction(&*I);
- ++I;
- }
- } else {
- MCInst TmpInst;
- MCInstLowering.lower(MI, TmpInst);
- EmitToStreamer(*OutStreamer, TmpInst);
- }
-}
-
-const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) {
- if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
- return E;
- return AsmPrinter::lowerConstant(CV);
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
new file mode 100644
index 0000000000000..0e43b4fe9461f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
@@ -0,0 +1,69 @@
+//===- AMDGPUMCInstLower.h - Lower AMDGPU MachineInstr to an MCInst -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Header of lower AMDGPU MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+class AsmPrinter;
+class MCContext;
+} // namespace llvm
+
+using namespace llvm;
+
+class AMDGPUMCInstLower {
+ MCContext &Ctx;
+ const TargetSubtargetInfo &ST;
+ const AsmPrinter &AP;
+
+public:
+ AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
+ const AsmPrinter &AP);
+
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+ /// Lower a MachineInstr to an MCInst
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+
+namespace {
+static inline const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
+ const Constant *CV,
+ MCContext &OutContext) {
+ // TargetMachine does not support llvm-style cast. Use C++-style cast.
+ // This is safe since TM is always of type AMDGPUTargetMachine or its
+ // derived class.
+ auto &AT = static_cast<const AMDGPUTargetMachine &>(TM);
+ auto *CE = dyn_cast<ConstantExpr>(CV);
+
+ // Lower null pointers in private and local address space.
+ // Clang generates addrspacecast for null pointers in private and local
+ // address space, which needs to be lowered.
+ if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
+ auto Op = CE->getOperand(0);
+ auto SrcAddr = Op->getType()->getPointerAddressSpace();
+ if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) {
+ auto DstAddr = CE->getType()->getPointerAddressSpace();
+ return MCConstantExpr::create(AT.getNullPointerValue(DstAddr),
+ OutContext);
+ }
+ }
+ return nullptr;
+}
+} // namespace
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 3f36dec694e7a..f657019c2a33f 100644
--- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -6,7 +6,6 @@
//
//==-----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600.h"
#include "R600RegisterInfo.h"
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 6e85018b81387..86218a3af28a8 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -44,24 +44,24 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUAliasAnalysis.cpp
AMDGPUAlwaysInlinePass.cpp
AMDGPUAnnotateKernelFeatures.cpp
- AMDGPUAttributor.cpp
AMDGPUAnnotateUniformValues.cpp
AMDGPUArgumentUsageInfo.cpp
AMDGPUAsmPrinter.cpp
AMDGPUAtomicOptimizer.cpp
+ AMDGPUAttributor.cpp
AMDGPUCallLowering.cpp
AMDGPUCodeGenPrepare.cpp
+ AMDGPUCtorDtorLowering.cpp
AMDGPUExportClustering.cpp
AMDGPUFixFunctionBitcasts.cpp
- AMDGPUCtorDtorLowering.cpp
AMDGPUFrameLowering.cpp
+ AMDGPUGlobalISelUtils.cpp
AMDGPUHSAMetadataStreamer.cpp
AMDGPUInstCombineIntrinsic.cpp
AMDGPUInstrInfo.cpp
AMDGPUInstructionSelector.cpp
AMDGPUISelDAGToDAG.cpp
AMDGPUISelLowering.cpp
- AMDGPUGlobalISelUtils.cpp
AMDGPULateCodeGenPrepare.cpp
AMDGPULegalizerInfo.cpp
AMDGPULibCalls.cpp
@@ -77,13 +77,16 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUMCInstLower.cpp
AMDGPUMIRFormatter.cpp
AMDGPUOpenCLEnqueuedBlockLowering.cpp
+ AMDGPUPerfHintAnalysis.cpp
AMDGPUPostLegalizerCombiner.cpp
AMDGPUPreLegalizerCombiner.cpp
+ AMDGPUPrintfRuntimeBinding.cpp
AMDGPUPromoteAlloca.cpp
AMDGPUPropagateAttributes.cpp
AMDGPURegBankCombiner.cpp
AMDGPURegisterBankInfo.cpp
AMDGPUReplaceLDSUseWithPointer.cpp
+ AMDGPUResourceUsageAnalysis.cpp
AMDGPURewriteOutArguments.cpp
AMDGPUSubtarget.cpp
AMDGPUTargetMachine.cpp
@@ -91,13 +94,14 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUTargetTransformInfo.cpp
AMDGPUUnifyDivergentExitNodes.cpp
AMDGPUUnifyMetadata.cpp
- AMDGPUPerfHintAnalysis.cpp
AMDILCFGStructurizer.cpp
- AMDGPUPrintfRuntimeBinding.cpp
- AMDGPUResourceUsageAnalysis.cpp
+ GCNDPPCombine.cpp
GCNHazardRecognizer.cpp
+ GCNILPSched.cpp
GCNIterativeScheduler.cpp
GCNMinRegStrategy.cpp
+ GCNNSAReassign.cpp
+ GCNPreRAOptimizations.cpp
GCNRegPressure.cpp
GCNSchedStrategy.cpp
R600AsmPrinter.cpp
@@ -107,9 +111,11 @@ add_llvm_target(AMDGPUCodeGen
R600ExpandSpecialInstrs.cpp
R600FrameLowering.cpp
R600InstrInfo.cpp
+ R600ISelDAGToDAG.cpp
R600ISelLowering.cpp
R600MachineFunctionInfo.cpp
R600MachineScheduler.cpp
+ R600MCInstLower.cpp
R600OpenCLImageTypeLoweringPass.cpp
R600OptimizeVectorRegisters.cpp
R600Packetizer.cpp
@@ -120,15 +126,14 @@ add_llvm_target(AMDGPUCodeGen
SIAnnotateControlFlow.cpp
SIFixSGPRCopies.cpp
SIFixVGPRCopies.cpp
- SIPreAllocateWWMRegs.cpp
SIFoldOperands.cpp
SIFormMemoryClauses.cpp
SIFrameLowering.cpp
SIInsertHardClauses.cpp
- SILateBranchLowering.cpp
SIInsertWaitcnts.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
+ SILateBranchLowering.cpp
SILoadStoreOptimizer.cpp
SILowerControlFlow.cpp
SILowerI1Copies.cpp
@@ -136,21 +141,18 @@ add_llvm_target(AMDGPUCodeGen
SIMachineFunctionInfo.cpp
SIMachineScheduler.cpp
SIMemoryLegalizer.cpp
+ SIModeRegister.cpp
SIOptimizeExecMasking.cpp
SIOptimizeExecMaskingPreRA.cpp
SIOptimizeVGPRLiveRange.cpp
SIPeepholeSDWA.cpp
SIPostRABundler.cpp
+ SIPreAllocateWWMRegs.cpp
SIPreEmitPeephole.cpp
SIProgramInfo.cpp
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
SIWholeQuadMode.cpp
- GCNILPSched.cpp
- GCNNSAReassign.cpp
- GCNDPPCombine.cpp
- GCNPreRAOptimizations.cpp
- SIModeRegister.cpp
LINK_COMPONENTS
Analysis
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 672552ff66557..d8bc0b2df2bdb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -97,7 +97,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool FP64;
bool FMA;
bool MIMG_R128;
- bool IsGCN;
bool CIInsts;
bool GFX8Insts;
bool GFX9Insts;
@@ -165,13 +164,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasArchitectedFlatScratch;
bool AddNoCarryInsts;
bool HasUnpackedD16VMem;
- bool R600ALUInst;
- bool CaymanISA;
- bool CFALUBug;
bool LDSMisalignedBug;
bool HasMFMAInlineLiteralBug;
- bool HasVertexCache;
- short TexVTXClauseSize;
bool UnalignedBufferAccess;
bool UnalignedDSAccess;
bool HasPackedTID;
diff --git a/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
new file mode 100644
index 0000000000000..9f842e91c0f37
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
@@ -0,0 +1,184 @@
+//===-- R600ISelDAGToDAG.cpp - A dag to dag inst selector for R600 --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines an instruction selector for the R600 subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUISelDAGToDAG.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
+#include "R600Subtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
+ const R600Subtarget *Subtarget;
+
+ bool isConstantLoad(const MemSDNode *N, int cbID) const;
+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue &IntPtr);
+ bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
+ SDValue &Offset);
+
+public:
+ explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel)
+ : AMDGPUDAGToDAGISel(TM, OptLevel) {}
+
+ void Select(SDNode *N) override;
+
+ bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) override;
+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void PreprocessISelDAG() override {}
+
+protected:
+ // Include the pieces autogenerated from the target description.
+#include "R600GenDAGISel.inc"
+};
+
+bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &MF.getSubtarget<R600Subtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
+bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
+ if (!N->readMem())
+ return false;
+ if (CbId == -1)
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
+
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+ SDValue &IntPtr) {
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+ IntPtr =
+ CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), true);
+ return true;
+ }
+ return false;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue &BaseReg,
+ SDValue &Offset) {
+ if (!isa<ConstantSDNode>(Addr)) {
+ BaseReg = Addr;
+ Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
+ return true;
+ }
+ return false;
+}
+
+void R600DAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return; // Already selected.
+ }
+
+ switch (Opc) {
+ default:
+ break;
+ case AMDGPUISD::BUILD_VERTICAL_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ case ISD::BUILD_VECTOR: {
+ EVT VT = N->getValueType(0);
+ unsigned NumVectorElts = VT.getVectorNumElements();
+ unsigned RegClassID;
+ // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ switch (NumVectorElts) {
+ case 2:
+ RegClassID = R600::R600_Reg64RegClassID;
+ break;
+ case 4:
+ if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+ RegClassID = R600::R600_Reg128VerticalRegClassID;
+ else
+ RegClassID = R600::R600_Reg128RegClassID;
+ break;
+ default:
+ llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+ }
+ SelectBuildVector(N, RegClassID);
+ return;
+ }
+ }
+
+ SelectCode(N);
+}
+
+bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *C;
+ SDLoc DL(Addr);
+
+ if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+ Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
+ Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ }
+
+ return true;
+}
+
+bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *IMMOffset;
+
+ if (Addr.getOpcode() == ISD::ADD &&
+ (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
+ isInt<16>(IMMOffset->getZExtValue())) {
+
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
+ return true;
+ // If the pointer address is constant, we can move it to the offset field.
+ } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) &&
+ isInt<16>(IMMOffset->getZExtValue())) {
+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ SDLoc(CurDAG->getEntryNode()), R600::ZERO,
+ MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
+ return true;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ return true;
+}
+
+/// This pass converts a legalized DAG into a R600-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
+ CodeGenOpt::Level OptLevel) {
+ return new R600DAGToDAGISel(TM, OptLevel);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
new file mode 100644
index 0000000000000..8f7807a2b4728
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
@@ -0,0 +1,73 @@
+//===- R600MCInstLower.cpp - Lower R600 MachineInstr to an MCInst ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Code to lower R600 MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUMCInstLower.h"
+#include "R600AsmPrinter.h"
+#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+
+class R600MCInstLower : public AMDGPUMCInstLower {
+public:
+ R600MCInstLower(MCContext &ctx, const R600Subtarget &ST,
+ const AsmPrinter &AP);
+
+ /// Lower a MachineInstr to an MCInst
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+
+R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST,
+ const AsmPrinter &AP)
+ : AMDGPUMCInstLower(Ctx, ST, AP) {}
+
+void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+ for (const MachineOperand &MO : MI->explicit_operands()) {
+ MCOperand MCOp;
+ lowerOperand(MO, MCOp);
+ OutMI.addOperand(MCOp);
+ }
+}
+
+void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
+ const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
+ R600MCInstLower MCInstLowering(OutContext, STI, *this);
+
+ StringRef Err;
+ if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
+ LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
+ C.emitError("Illegal instruction detected: " + Err);
+ MI->print(errs());
+ }
+
+ if (MI->isBundle()) {
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
+ while (I != MBB->instr_end() && I->isInsideBundle()) {
+ emitInstruction(&*I);
+ ++I;
+ }
+ } else {
+ MCInst TmpInst;
+ MCInstLowering.lower(MI, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ }
+}
+
+const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) {
+ if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
+ return E;
+ return AsmPrinter::lowerConstant(CV);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
index 5ddfc6aac6cd8..365c005b25034 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "R600TargetTransformInfo.h"
+#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "R600Subtarget.h"
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
index 5af4b180c0058..2e4d83fbbc39c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPULDSUtils.h"
+#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
index ffcafb9b76cec..a71920c79bc33 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
-#include "AMDGPU.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Constants.h"
More information about the llvm-commits
mailing list