[llvm] 47d6274 - [NFC][AMDGPU] Reduce includes dependencies, part 2

Fri Oct 1 07:50:39 PDT 2021

Author: Daniil Fukalov
Date: 2021-10-01T17:50:20+03:00
New Revision: 47d6274d4c31c9b46d059a7421277bef0395a869

URL: https://github.com/llvm/llvm-project/commit/47d6274d4c31c9b46d059a7421277bef0395a869
DIFF: https://github.com/llvm/llvm-project/commit/47d6274d4c31c9b46d059a7421277bef0395a869.diff

LOG: [NFC][AMDGPU] Reduce includes dependencies, part 2

1. Splitted out some parts of R600 target to separate modules/headers.
2. Reduced some include lists in headers.
3. Minor forward declarations, redundant includes and flags in GCNSubtarget
   cleanup.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D109351

Added: 
    llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
    llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
    llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
    llvm/lib/Target/AMDGPU/R600MCInstLower.cpp

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
    llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
    llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
    llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
    llvm/lib/Target/AMDGPU/CMakeLists.txt
    llvm/lib/Target/AMDGPU/GCNSubtarget.h
    llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 7b2af01b46745..dd3eb3849eac1 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -10,6 +10,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUAliasAnalysis.h"
+#include "AMDGPU.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Instructions.h"
 
@@ -37,6 +38,10 @@ ImmutablePass *llvm::createAMDGPUExternalAAWrapperPass() {
   return new AMDGPUExternalAAWrapper();
 }
 
+AMDGPUAAWrapperPass::AMDGPUAAWrapperPass() : ImmutablePass(ID) {
+  initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
 void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
 }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index c694e20c623db..22be014813b03 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -12,7 +12,6 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
 
-#include "AMDGPU.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 
 namespace llvm {
@@ -66,9 +65,7 @@ class AMDGPUAAWrapperPass : public ImmutablePass {
 public:
   static char ID;
 
-  AMDGPUAAWrapperPass() : ImmutablePass(ID) {
-    initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
-  }
+  AMDGPUAAWrapperPass();
 
   AMDGPUAAResult &getResult() { return *Result; }
   const AMDGPUAAResult &getResult() const { return *Result; }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5aa89e902b60e..20defbc883c18 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AMDGPUISelDAGToDAG.h"
 #include "AMDGPU.h"
 #include "AMDGPUTargetMachine.h"
 #include "MCTargetDesc/R600MCTargetDesc.h"
-#include "R600.h"
-#include "R600Subtarget.h"
+#include "R600RegisterInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -35,287 +35,12 @@
 
 using namespace llvm;
 
-namespace llvm {
-
-class R600InstrInfo;
-
-} // end namespace llvm
-
 //===----------------------------------------------------------------------===//
 // Instruction Selector Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 
-static bool isNullConstantOrUndef(SDValue V) {
-  if (V.isUndef())
-    return true;
-
-  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
-  return Const != nullptr && Const->isZero();
-}
-
-static bool getConstantValue(SDValue N, uint32_t &Out) {
-  // This is only used for packed vectors, where using 0 for undef should
-  // always be good.
-  if (N.isUndef()) {
-    Out = 0;
-    return true;
-  }
-
-  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
-    Out = C->getAPIntValue().getSExtValue();
-    return true;
-  }
-
-  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
-    Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
-    return true;
-  }
-
-  return false;
-}
-
-// TODO: Handle undef as zero
-static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
-                                 bool Negate = false) {
-  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
-  uint32_t LHSVal, RHSVal;
-  if (getConstantValue(N->getOperand(0), LHSVal) &&
-      getConstantValue(N->getOperand(1), RHSVal)) {
-    SDLoc SL(N);
-    uint32_t K = Negate ?
-      (-LHSVal & 0xffff) | (-RHSVal << 16) :
-      (LHSVal & 0xffff) | (RHSVal << 16);
-    return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
-                              DAG.getTargetConstant(K, SL, MVT::i32));
-  }
-
-  return nullptr;
-}
-
-static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
-  return packConstantV2I16(N, DAG, true);
-}
-
-/// AMDGPU specific code to select AMDGPU machine instructions for
-/// SelectionDAG operations.
-class AMDGPUDAGToDAGISel : public SelectionDAGISel {
-  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
-  // make the right decision when generating code for 
diff erent targets.
-  const GCNSubtarget *Subtarget;
-
-  // Default FP mode for the current function.
-  AMDGPU::SIModeRegisterDefaults Mode;
-
-  bool EnableLateStructurizeCFG;
-
-  // Instructions that will be lowered with a final instruction that zeros the
-  // high result bits.
-  bool fp16SrcZerosHighBits(unsigned Opc) const;
-
-public:
-  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
-                              CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
-    : SelectionDAGISel(*TM, OptLevel) {
-    EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
-  }
-  ~AMDGPUDAGToDAGISel() override = default;
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<AMDGPUArgumentUsageInfo>();
-    AU.addRequired<LegacyDivergenceAnalysis>();
-#ifdef EXPENSIVE_CHECKS
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<LoopInfoWrapperPass>();
-#endif
-    SelectionDAGISel::getAnalysisUsage(AU);
-  }
-
-  bool matchLoadD16FromBuildVector(SDNode *N) const;
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-  void PreprocessISelDAG() override;
-  void Select(SDNode *N) override;
-  StringRef getPassName() const override;
-  void PostprocessISelDAG() override;
-
-protected:
-  void SelectBuildVector(SDNode *N, unsigned RegClassID);
-
-private:
-  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
-  bool isNoNanSrc(SDValue N) const;
-  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
-  bool isNegInlineImmediate(const SDNode *N) const {
-    return isInlineImmediate(N, true);
-  }
-
-  bool isInlineImmediate16(int64_t Imm) const {
-    return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
-  }
-
-  bool isInlineImmediate32(int64_t Imm) const {
-    return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
-  }
-
-  bool isInlineImmediate64(int64_t Imm) const {
-    return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
-  }
-
-  bool isInlineImmediate(const APFloat &Imm) const {
-    return Subtarget->getInstrInfo()->isInlineConstant(Imm);
-  }
-
-  bool isVGPRImm(const SDNode *N) const;
-  bool isUniformLoad(const SDNode *N) const;
-  bool isUniformBr(const SDNode *N) const;
-
-  bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
-                                  SDValue &RHS) const;
-
-  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
-
-  SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
-  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
-  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
-
-  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
-  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
-  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
-  bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
-  bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
-                        unsigned Size) const;
-  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
-  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
-                                 SDValue &Offset1) const;
-  bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
-                                  SDValue &Offset1) const;
-  bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
-                          SDValue &Offset1, unsigned Size) const;
-  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
-                   SDValue &SOffset, SDValue &Offset, SDValue &Offen,
-                   SDValue &Idxen, SDValue &Addr64) const;
-  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
-                         SDValue &SOffset, SDValue &Offset) const;
-  bool SelectMUBUFScratchOffen(SDNode *Parent,
-                               SDValue Addr, SDValue &RSrc, SDValue &VAddr,
-                               SDValue &SOffset, SDValue &ImmOffset) const;
-  bool SelectMUBUFScratchOffset(SDNode *Parent,
-                                SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
-                                SDValue &Offset) const;
-
-  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
-                         SDValue &Offset) const;
-
-  bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
-                            SDValue &Offset, uint64_t FlatVariant) const;
-  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
-                        SDValue &Offset) const;
-  bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
-                          SDValue &Offset) const;
-  bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
-                           SDValue &Offset) const;
-  bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
-                         SDValue &VOffset, SDValue &Offset) const;
-  bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
-                          SDValue &Offset) const;
-
-  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
-                        bool &Imm) const;
-  SDValue Expand32BitAddress(SDValue Addr) const;
-  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
-                  bool &Imm) const;
-  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
-  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
-  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
-  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
-  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
-  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
-
-  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
-                          bool AllowAbs = true) const;
-  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-  bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
-  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
-                       SDValue &Clamp, SDValue &Omod) const;
-  bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
-                        SDValue &Clamp, SDValue &Omod) const;
-  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
-                         SDValue &Clamp, SDValue &Omod) const;
-
-  bool SelectVOP3OMods(SDValue In, SDValue &Src,
-                       SDValue &Clamp, SDValue &Omod) const;
-
-  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
-  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
-  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
-  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
-  SDValue getHi16Elt(SDValue In) const;
-
-  SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
-
-  void SelectADD_SUB_I64(SDNode *N);
-  void SelectAddcSubb(SDNode *N);
-  void SelectUADDO_USUBO(SDNode *N);
-  void SelectDIV_SCALE(SDNode *N);
-  void SelectMAD_64_32(SDNode *N);
-  void SelectFMA_W_CHAIN(SDNode *N);
-  void SelectFMUL_W_CHAIN(SDNode *N);
-
-  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
-                   uint32_t Offset, uint32_t Width);
-  void SelectS_BFEFromShifts(SDNode *N);
-  void SelectS_BFE(SDNode *N);
-  bool isCBranchSCC(const SDNode *N) const;
-  void SelectBRCOND(SDNode *N);
-  void SelectFMAD_FMA(SDNode *N);
-  void SelectATOMIC_CMP_SWAP(SDNode *N);
-  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
-  void SelectDS_GWS(SDNode *N, unsigned IntrID);
-  void SelectInterpP1F16(SDNode *N);
-  void SelectINTRINSIC_W_CHAIN(SDNode *N);
-  void SelectINTRINSIC_WO_CHAIN(SDNode *N);
-  void SelectINTRINSIC_VOID(SDNode *N);
-
-protected:
-  // Include the pieces autogenerated from the target description.
-#include "AMDGPUGenDAGISel.inc"
-};
-
-class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
-  const R600Subtarget *Subtarget;
-
-  bool isConstantLoad(const MemSDNode *N, int cbID) const;
-  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
-  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
-                                       SDValue& Offset);
-public:
-  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
-      AMDGPUDAGToDAGISel(TM, OptLevel) {}
-
-  void Select(SDNode *N) override;
-
-  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
-                          SDValue &Offset) override;
-  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
-                          SDValue &Offset) override;
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  void PreprocessISelDAG() override {}
-
-protected:
-  // Include the pieces autogenerated from the target description.
-#include "R600GenDAGISel.inc"
-};
-
 static SDValue stripBitcast(SDValue Val) {
   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
 }
@@ -389,11 +114,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
   return new AMDGPUDAGToDAGISel(TM, OptLevel);
 }
 
-/// This pass converts a legalized DAG into a R600-specific
-// DAG, ready for instruction scheduling.
-FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
-                                      CodeGenOpt::Level OptLevel) {
-  return new R600DAGToDAGISel(TM, OptLevel);
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(
+    TargetMachine *TM /*= nullptr*/,
+    CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
+    : SelectionDAGISel(*TM, OptLevel) {
+  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
 }
 
 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
@@ -471,6 +196,16 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
   }
 }
 
+void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AMDGPUArgumentUsageInfo>();
+  AU.addRequired<LegacyDivergenceAnalysis>();
+#ifdef EXPENSIVE_CHECKS
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addRequired<LoopInfoWrapperPass>();
+#endif
+  SelectionDAGISel::getAnalysisUsage(AU);
+}
+
 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
   assert(Subtarget->d16PreservesUnusedBits());
   MVT VT = N->getValueType(0).getSimpleVT();
@@ -3121,128 +2856,3 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
     CurDAG->RemoveDeadNodes();
   } while (IsModified);
 }
-
-bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &MF.getSubtarget<R600Subtarget>();
-  return SelectionDAGISel::runOnMachineFunction(MF);
-}
-
-bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
-  if (!N->readMem())
-    return false;
-  if (CbId == -1)
-    return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
-           N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
-
-  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
-}
-
-bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
-                                                         SDValue& IntPtr) {
-  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
-    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
-                                       true);
-    return true;
-  }
-  return false;
-}
-
-bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
-    SDValue& BaseReg, SDValue &Offset) {
-  if (!isa<ConstantSDNode>(Addr)) {
-    BaseReg = Addr;
-    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
-    return true;
-  }
-  return false;
-}
-
-void R600DAGToDAGISel::Select(SDNode *N) {
-  unsigned int Opc = N->getOpcode();
-  if (N->isMachineOpcode()) {
-    N->setNodeId(-1);
-    return;   // Already selected.
-  }
-
-  switch (Opc) {
-  default: break;
-  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
-  case ISD::SCALAR_TO_VECTOR:
-  case ISD::BUILD_VECTOR: {
-    EVT VT = N->getValueType(0);
-    unsigned NumVectorElts = VT.getVectorNumElements();
-    unsigned RegClassID;
-    // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
-    // that adds a 128 bits reg copy when going through TwoAddressInstructions
-    // pass. We want to avoid 128 bits copies as much as possible because they
-    // can't be bundled by our scheduler.
-    switch(NumVectorElts) {
-    case 2: RegClassID = R600::R600_Reg64RegClassID; break;
-    case 4:
-      if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
-        RegClassID = R600::R600_Reg128VerticalRegClassID;
-      else
-        RegClassID = R600::R600_Reg128RegClassID;
-      break;
-    default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
-    }
-    SelectBuildVector(N, RegClassID);
-    return;
-  }
-  }
-
-  SelectCode(N);
-}
-
-bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
-                                          SDValue &Offset) {
-  ConstantSDNode *C;
-  SDLoc DL(Addr);
-
-  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
-    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
-    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
-  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
-             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
-    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
-    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
-  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
-            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
-    Base = Addr.getOperand(0);
-    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
-  } else {
-    Base = Addr;
-    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
-  }
-
-  return true;
-}
-
-bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
-                                          SDValue &Offset) {
-  ConstantSDNode *IMMOffset;
-
-  if (Addr.getOpcode() == ISD::ADD
-      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
-      && isInt<16>(IMMOffset->getZExtValue())) {
-
-      Base = Addr.getOperand(0);
-      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
-                                         MVT::i32);
-      return true;
-  // If the pointer address is constant, we can move it to the offset field.
-  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
-             && isInt<16>(IMMOffset->getZExtValue())) {
-    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
-                                  SDLoc(CurDAG->getEntryNode()),
-                                  R600::ZERO, MVT::i32);
-    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
-                                       MVT::i32);
-    return true;
-  }
-
-  // Default case, no offset
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
-  return true;
-}

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
new file mode 100644
index 0000000000000..6f41191658499
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -0,0 +1,257 @@
+//===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines an instruction selector for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
+
+#include "GCNSubtarget.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+static inline bool isNullConstantOrUndef(SDValue V) {
+  if (V.isUndef())
+    return true;
+
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isZero();
+}
+
+static inline bool getConstantValue(SDValue N, uint32_t &Out) {
+  // This is only used for packed vectors, where using 0 for undef should
+  // always be good.
+  if (N.isUndef()) {
+    Out = 0;
+    return true;
+  }
+
+  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+    Out = C->getAPIntValue().getSExtValue();
+    return true;
+  }
+
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+    Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
+    return true;
+  }
+
+  return false;
+}
+
+// TODO: Handle undef as zero
+static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
+                                        bool Negate = false) {
+  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
+  uint32_t LHSVal, RHSVal;
+  if (getConstantValue(N->getOperand(0), LHSVal) &&
+      getConstantValue(N->getOperand(1), RHSVal)) {
+    SDLoc SL(N);
+    uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16)
+                        : (LHSVal & 0xffff) | (RHSVal << 16);
+    return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
+                              DAG.getTargetConstant(K, SL, MVT::i32));
+  }
+
+  return nullptr;
+}
+
+static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
+  return packConstantV2I16(N, DAG, true);
+}
+} // namespace
+
+/// AMDGPU specific code to select AMDGPU machine instructions for
+/// SelectionDAG operations.
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+  // make the right decision when generating code for 
diff erent targets.
+  const GCNSubtarget *Subtarget;
+
+  // Default FP mode for the current function.
+  AMDGPU::SIModeRegisterDefaults Mode;
+
+  bool EnableLateStructurizeCFG;
+
+  // Instructions that will be lowered with a final instruction that zeros the
+  // high result bits.
+  bool fp16SrcZerosHighBits(unsigned Opc) const;
+
+public:
+  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
+                              CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
+  ~AMDGPUDAGToDAGISel() override = default;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool matchLoadD16FromBuildVector(SDNode *N) const;
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  void PreprocessISelDAG() override;
+  void Select(SDNode *N) override;
+  StringRef getPassName() const override;
+  void PostprocessISelDAG() override;
+
+protected:
+  void SelectBuildVector(SDNode *N, unsigned RegClassID);
+
+private:
+  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
+  bool isNoNanSrc(SDValue N) const;
+  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
+  bool isNegInlineImmediate(const SDNode *N) const {
+    return isInlineImmediate(N, true);
+  }
+
+  bool isInlineImmediate16(int64_t Imm) const {
+    return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
+  }
+
+  bool isInlineImmediate32(int64_t Imm) const {
+    return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
+  }
+
+  bool isInlineImmediate64(int64_t Imm) const {
+    return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
+  }
+
+  bool isInlineImmediate(const APFloat &Imm) const {
+    return Subtarget->getInstrInfo()->isInlineConstant(Imm);
+  }
+
+  bool isVGPRImm(const SDNode *N) const;
+  bool isUniformLoad(const SDNode *N) const;
+  bool isUniformBr(const SDNode *N) const;
+
+  bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
+                                  SDValue &RHS) const;
+
+  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
+
+  SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
+  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
+  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
+
+  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
+  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+  bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
+  bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
+                        unsigned Size) const;
+  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
+  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+                                 SDValue &Offset1) const;
+  bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+                                  SDValue &Offset1) const;
+  bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+                          SDValue &Offset1, unsigned Size) const;
+  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+                   SDValue &SOffset, SDValue &Offset, SDValue &Offen,
+                   SDValue &Idxen, SDValue &Addr64) const;
+  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+                         SDValue &SOffset, SDValue &Offset) const;
+  bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
+                               SDValue &VAddr, SDValue &SOffset,
+                               SDValue &ImmOffset) const;
+  bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
+                                SDValue &Soffset, SDValue &Offset) const;
+
+  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+                         SDValue &Offset) const;
+
+  bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
+                            SDValue &Offset, uint64_t FlatVariant) const;
+  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+                        SDValue &Offset) const;
+  bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+                          SDValue &Offset) const;
+  bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+                           SDValue &Offset) const;
+  bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+                         SDValue &VOffset, SDValue &Offset) const;
+  bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+                          SDValue &Offset) const;
+
+  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
+                        bool &Imm) const;
+  SDValue Expand32BitAddress(SDValue Addr) const;
+  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
+                  bool &Imm) const;
+  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
+  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
+  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
+
+  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
+                          bool AllowAbs = true) const;
+  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
+  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+                       SDValue &Clamp, SDValue &Omod) const;
+  bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+                        SDValue &Clamp, SDValue &Omod) const;
+  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+                         SDValue &Clamp, SDValue &Omod) const;
+
+  bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
+                       SDValue &Omod) const;
+
+  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
+                                 unsigned &Mods) const;
+  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+  SDValue getHi16Elt(SDValue In) const;
+
+  SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
+
+  void SelectADD_SUB_I64(SDNode *N);
+  void SelectAddcSubb(SDNode *N);
+  void SelectUADDO_USUBO(SDNode *N);
+  void SelectDIV_SCALE(SDNode *N);
+  void SelectMAD_64_32(SDNode *N);
+  void SelectFMA_W_CHAIN(SDNode *N);
+  void SelectFMUL_W_CHAIN(SDNode *N);
+
+  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
+                   uint32_t Offset, uint32_t Width);
+  void SelectS_BFEFromShifts(SDNode *N);
+  void SelectS_BFE(SDNode *N);
+  bool isCBranchSCC(const SDNode *N) const;
+  void SelectBRCOND(SDNode *N);
+  void SelectFMAD_FMA(SDNode *N);
+  void SelectATOMIC_CMP_SWAP(SDNode *N);
+  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
+  void SelectDS_GWS(SDNode *N, unsigned IntrID);
+  void SelectInterpP1F16(SDNode *N);
+  void SelectINTRINSIC_W_CHAIN(SDNode *N);
+  void SelectINTRINSIC_WO_CHAIN(SDNode *N);
+  void SelectINTRINSIC_VOID(SDNode *N);
+
+protected:
+  // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index ccbcc867215c2..99e72815db2e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -12,12 +12,11 @@
 //===----------------------------------------------------------------------===//
 //
 
+#include "AMDGPUMCInstLower.h"
 #include "AMDGPUAsmPrinter.h"
 #include "AMDGPUTargetMachine.h"
 #include "MCTargetDesc/AMDGPUInstPrinter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "R600AsmPrinter.h"
-#include "R600Subtarget.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/IR/Constants.h"
@@ -35,36 +34,6 @@
 
 using namespace llvm;
 
-namespace {
-
-class AMDGPUMCInstLower {
-  MCContext &Ctx;
-  const TargetSubtargetInfo &ST;
-  const AsmPrinter &AP;
-
-public:
-  AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
-                    const AsmPrinter &AP);
-
-  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
-
-  /// Lower a MachineInstr to an MCInst
-  void lower(const MachineInstr *MI, MCInst &OutMI) const;
-
-};
-
-class R600MCInstLower : public AMDGPUMCInstLower {
-public:
-  R600MCInstLower(MCContext &ctx, const R600Subtarget &ST,
-                  const AsmPrinter &AP);
-
-  /// Lower a MachineInstr to an MCInst
-  void lower(const MachineInstr *MI, MCInst &OutMI) const;
-};
-
-
-} // End anonymous namespace
-
 #include "AMDGPUGenMCPseudoLowering.inc"
 
 AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx,
@@ -195,30 +164,6 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
   return MCInstLowering.lowerOperand(MO, MCOp);
 }
 
-static const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
-                                        const Constant *CV,
-                                        MCContext &OutContext) {
-  // TargetMachine does not support llvm-style cast. Use C++-style cast.
-  // This is safe since TM is always of type AMDGPUTargetMachine or its
-  // derived class.
-  auto &AT = static_cast<const AMDGPUTargetMachine&>(TM);
-  auto *CE = dyn_cast<ConstantExpr>(CV);
-
-  // Lower null pointers in private and local address space.
-  // Clang generates addrspacecast for null pointers in private and local
-  // address space, which needs to be lowered.
-  if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
-    auto Op = CE->getOperand(0);
-    auto SrcAddr = Op->getType()->getPointerAddressSpace();
-    if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) {
-      auto DstAddr = CE->getType()->getPointerAddressSpace();
-      return MCConstantExpr::create(AT.getNullPointerValue(DstAddr),
-        OutContext);
-    }
-  }
-  return nullptr;
-}
-
 const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
   if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
     return E;
@@ -332,47 +277,3 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
     }
   }
 }
-
-R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST,
-                                 const AsmPrinter &AP) :
-        AMDGPUMCInstLower(Ctx, ST, AP) { }
-
-void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
-  OutMI.setOpcode(MI->getOpcode());
-  for (const MachineOperand &MO : MI->explicit_operands()) {
-    MCOperand MCOp;
-    lowerOperand(MO, MCOp);
-    OutMI.addOperand(MCOp);
-  }
-}
-
-void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
-  const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
-  R600MCInstLower MCInstLowering(OutContext, STI, *this);
-
-  StringRef Err;
-  if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
-    LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
-    C.emitError("Illegal instruction detected: " + Err);
-    MI->print(errs());
-  }
-
-  if (MI->isBundle()) {
-    const MachineBasicBlock *MBB = MI->getParent();
-    MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
-    while (I != MBB->instr_end() && I->isInsideBundle()) {
-      emitInstruction(&*I);
-      ++I;
-    }
-  } else {
-    MCInst TmpInst;
-    MCInstLowering.lower(MI, TmpInst);
-    EmitToStreamer(*OutStreamer, TmpInst);
- }
-}
-
-const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) {
-  if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
-    return E;
-  return AsmPrinter::lowerConstant(CV);
-}

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
new file mode 100644
index 0000000000000..0e43b4fe9461f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
@@ -0,0 +1,69 @@
+//===- AMDGPUMCInstLower.h - Lower AMDGPU MachineInstr to an MCInst -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Header of lower AMDGPU MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+class AsmPrinter;
+class MCContext;
+} // namespace llvm
+
+using namespace llvm;
+
+class AMDGPUMCInstLower {
+  MCContext &Ctx;
+  const TargetSubtargetInfo &ST;
+  const AsmPrinter &AP;
+
+public:
+  AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
+                    const AsmPrinter &AP);
+
+  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+  /// Lower a MachineInstr to an MCInst
+  void lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+
+namespace {
+static inline const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
+                                               const Constant *CV,
+                                               MCContext &OutContext) {
+  // TargetMachine does not support llvm-style cast. Use C++-style cast.
+  // This is safe since TM is always of type AMDGPUTargetMachine or its
+  // derived class.
+  auto &AT = static_cast<const AMDGPUTargetMachine &>(TM);
+  auto *CE = dyn_cast<ConstantExpr>(CV);
+
+  // Lower null pointers in private and local address space.
+  // Clang generates addrspacecast for null pointers in private and local
+  // address space, which needs to be lowered.
+  if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
+    auto Op = CE->getOperand(0);
+    auto SrcAddr = Op->getType()->getPointerAddressSpace();
+    if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) {
+      auto DstAddr = CE->getType()->getPointerAddressSpace();
+      return MCConstantExpr::create(AT.getNullPointerValue(DstAddr),
+                                    OutContext);
+    }
+  }
+  return nullptr;
+}
+} // namespace
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H

diff  --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 3f36dec694e7a..f657019c2a33f 100644
--- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -6,7 +6,6 @@
 //
 //==-----------------------------------------------------------------------===//
 
-#include "AMDGPU.h"
 #include "MCTargetDesc/R600MCTargetDesc.h"
 #include "R600.h"
 #include "R600RegisterInfo.h"

diff  --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 6e85018b81387..86218a3af28a8 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -44,24 +44,24 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPUAliasAnalysis.cpp
   AMDGPUAlwaysInlinePass.cpp
   AMDGPUAnnotateKernelFeatures.cpp
-  AMDGPUAttributor.cpp
   AMDGPUAnnotateUniformValues.cpp
   AMDGPUArgumentUsageInfo.cpp
   AMDGPUAsmPrinter.cpp
   AMDGPUAtomicOptimizer.cpp
+  AMDGPUAttributor.cpp
   AMDGPUCallLowering.cpp
   AMDGPUCodeGenPrepare.cpp
+  AMDGPUCtorDtorLowering.cpp
   AMDGPUExportClustering.cpp
   AMDGPUFixFunctionBitcasts.cpp
-  AMDGPUCtorDtorLowering.cpp
   AMDGPUFrameLowering.cpp
+  AMDGPUGlobalISelUtils.cpp
   AMDGPUHSAMetadataStreamer.cpp
   AMDGPUInstCombineIntrinsic.cpp
   AMDGPUInstrInfo.cpp
   AMDGPUInstructionSelector.cpp
   AMDGPUISelDAGToDAG.cpp
   AMDGPUISelLowering.cpp
-  AMDGPUGlobalISelUtils.cpp
   AMDGPULateCodeGenPrepare.cpp
   AMDGPULegalizerInfo.cpp
   AMDGPULibCalls.cpp
@@ -77,13 +77,16 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPUMCInstLower.cpp
   AMDGPUMIRFormatter.cpp
   AMDGPUOpenCLEnqueuedBlockLowering.cpp
+  AMDGPUPerfHintAnalysis.cpp
   AMDGPUPostLegalizerCombiner.cpp
   AMDGPUPreLegalizerCombiner.cpp
+  AMDGPUPrintfRuntimeBinding.cpp
   AMDGPUPromoteAlloca.cpp
   AMDGPUPropagateAttributes.cpp
   AMDGPURegBankCombiner.cpp
   AMDGPURegisterBankInfo.cpp
   AMDGPUReplaceLDSUseWithPointer.cpp
+  AMDGPUResourceUsageAnalysis.cpp
   AMDGPURewriteOutArguments.cpp
   AMDGPUSubtarget.cpp
   AMDGPUTargetMachine.cpp
@@ -91,13 +94,14 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPUTargetTransformInfo.cpp
   AMDGPUUnifyDivergentExitNodes.cpp
   AMDGPUUnifyMetadata.cpp
-  AMDGPUPerfHintAnalysis.cpp
   AMDILCFGStructurizer.cpp
-  AMDGPUPrintfRuntimeBinding.cpp
-  AMDGPUResourceUsageAnalysis.cpp
+  GCNDPPCombine.cpp
   GCNHazardRecognizer.cpp
+  GCNILPSched.cpp
   GCNIterativeScheduler.cpp
   GCNMinRegStrategy.cpp
+  GCNNSAReassign.cpp
+  GCNPreRAOptimizations.cpp
   GCNRegPressure.cpp
   GCNSchedStrategy.cpp
   R600AsmPrinter.cpp
@@ -107,9 +111,11 @@ add_llvm_target(AMDGPUCodeGen
   R600ExpandSpecialInstrs.cpp
   R600FrameLowering.cpp
   R600InstrInfo.cpp
+  R600ISelDAGToDAG.cpp
   R600ISelLowering.cpp
   R600MachineFunctionInfo.cpp
   R600MachineScheduler.cpp
+  R600MCInstLower.cpp
   R600OpenCLImageTypeLoweringPass.cpp
   R600OptimizeVectorRegisters.cpp
   R600Packetizer.cpp
@@ -120,15 +126,14 @@ add_llvm_target(AMDGPUCodeGen
   SIAnnotateControlFlow.cpp
   SIFixSGPRCopies.cpp
   SIFixVGPRCopies.cpp
-  SIPreAllocateWWMRegs.cpp
   SIFoldOperands.cpp
   SIFormMemoryClauses.cpp
   SIFrameLowering.cpp
   SIInsertHardClauses.cpp
-  SILateBranchLowering.cpp
   SIInsertWaitcnts.cpp
   SIInstrInfo.cpp
   SIISelLowering.cpp
+  SILateBranchLowering.cpp
   SILoadStoreOptimizer.cpp
   SILowerControlFlow.cpp
   SILowerI1Copies.cpp
@@ -136,21 +141,18 @@ add_llvm_target(AMDGPUCodeGen
   SIMachineFunctionInfo.cpp
   SIMachineScheduler.cpp
   SIMemoryLegalizer.cpp
+  SIModeRegister.cpp
   SIOptimizeExecMasking.cpp
   SIOptimizeExecMaskingPreRA.cpp
   SIOptimizeVGPRLiveRange.cpp
   SIPeepholeSDWA.cpp
   SIPostRABundler.cpp
+  SIPreAllocateWWMRegs.cpp
   SIPreEmitPeephole.cpp
   SIProgramInfo.cpp
   SIRegisterInfo.cpp
   SIShrinkInstructions.cpp
   SIWholeQuadMode.cpp
-  GCNILPSched.cpp
-  GCNNSAReassign.cpp
-  GCNDPPCombine.cpp
-  GCNPreRAOptimizations.cpp
-  SIModeRegister.cpp
 
   LINK_COMPONENTS
   Analysis

diff  --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 672552ff66557..d8bc0b2df2bdb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -97,7 +97,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool FP64;
   bool FMA;
   bool MIMG_R128;
-  bool IsGCN;
   bool CIInsts;
   bool GFX8Insts;
   bool GFX9Insts;
@@ -165,13 +164,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasArchitectedFlatScratch;
   bool AddNoCarryInsts;
   bool HasUnpackedD16VMem;
-  bool R600ALUInst;
-  bool CaymanISA;
-  bool CFALUBug;
   bool LDSMisalignedBug;
   bool HasMFMAInlineLiteralBug;
-  bool HasVertexCache;
-  short TexVTXClauseSize;
   bool UnalignedBufferAccess;
   bool UnalignedDSAccess;
   bool HasPackedTID;

diff  --git a/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
new file mode 100644
index 0000000000000..9f842e91c0f37
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
@@ -0,0 +1,184 @@
+//===-- R600ISelDAGToDAG.cpp - A dag to dag inst selector for R600 --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines an instruction selector for the R600 subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUISelDAGToDAG.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
+#include "R600Subtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
+  const R600Subtarget *Subtarget;
+
+  bool isConstantLoad(const MemSDNode *N, int cbID) const;
+  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue &IntPtr);
+  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
+                                       SDValue &Offset);
+
+public:
+  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel)
+      : AMDGPUDAGToDAGISel(TM, OptLevel) {}
+
+  void Select(SDNode *N) override;
+
+  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
+                          SDValue &Offset) override;
+  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+                          SDValue &Offset) override;
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void PreprocessISelDAG() override {}
+
+protected:
+  // Include the pieces autogenerated from the target description.
+#include "R600GenDAGISel.inc"
+};
+
+bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  Subtarget = &MF.getSubtarget<R600Subtarget>();
+  return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
+bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
+  if (!N->readMem())
+    return false;
+  if (CbId == -1)
+    return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+           N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
+
+  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+                                                       SDValue &IntPtr) {
+  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+    IntPtr =
+        CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), true);
+    return true;
+  }
+  return false;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+                                                       SDValue &BaseReg,
+                                                       SDValue &Offset) {
+  if (!isa<ConstantSDNode>(Addr)) {
+    BaseReg = Addr;
+    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
+    return true;
+  }
+  return false;
+}
+
+void R600DAGToDAGISel::Select(SDNode *N) {
+  unsigned int Opc = N->getOpcode();
+  if (N->isMachineOpcode()) {
+    N->setNodeId(-1);
+    return; // Already selected.
+  }
+
+  switch (Opc) {
+  default:
+    break;
+  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
+  case ISD::SCALAR_TO_VECTOR:
+  case ISD::BUILD_VECTOR: {
+    EVT VT = N->getValueType(0);
+    unsigned NumVectorElts = VT.getVectorNumElements();
+    unsigned RegClassID;
+    // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+    // that adds a 128 bits reg copy when going through TwoAddressInstructions
+    // pass. We want to avoid 128 bits copies as much as possible because they
+    // can't be bundled by our scheduler.
+    switch (NumVectorElts) {
+    case 2:
+      RegClassID = R600::R600_Reg64RegClassID;
+      break;
+    case 4:
+      if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+        RegClassID = R600::R600_Reg128VerticalRegClassID;
+      else
+        RegClassID = R600::R600_Reg128RegClassID;
+      break;
+    default:
+      llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+    }
+    SelectBuildVector(N, RegClassID);
+    return;
+  }
+  }
+
+  SelectCode(N);
+}
+
+bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+                                          SDValue &Offset) {
+  ConstantSDNode *C;
+  SDLoc DL(Addr);
+
+  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
+             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
+    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else {
+    Base = Addr;
+    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+  }
+
+  return true;
+}
+
+bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+                                          SDValue &Offset) {
+  ConstantSDNode *IMMOffset;
+
+  if (Addr.getOpcode() == ISD::ADD &&
+      (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
+      isInt<16>(IMMOffset->getZExtValue())) {
+
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+                                       MVT::i32);
+    return true;
+    // If the pointer address is constant, we can move it to the offset field.
+  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) &&
+             isInt<16>(IMMOffset->getZExtValue())) {
+    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+                                  SDLoc(CurDAG->getEntryNode()), R600::ZERO,
+                                  MVT::i32);
+    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+                                       MVT::i32);
+    return true;
+  }
+
+  // Default case, no offset
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+  return true;
+}
+
+/// This pass converts a legalized DAG into a R600-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
+                                      CodeGenOpt::Level OptLevel) {
+  return new R600DAGToDAGISel(TM, OptLevel);
+}

diff  --git a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
new file mode 100644
index 0000000000000..8f7807a2b4728
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
@@ -0,0 +1,73 @@
+//===- R600MCInstLower.cpp - Lower R600 MachineInstr to an MCInst ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Code to lower R600 MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUMCInstLower.h"
+#include "R600AsmPrinter.h"
+#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+
+class R600MCInstLower : public AMDGPUMCInstLower {
+public:
+  R600MCInstLower(MCContext &ctx, const R600Subtarget &ST,
+                  const AsmPrinter &AP);
+
+  /// Lower a MachineInstr to an MCInst
+  void lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+
+R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST,
+                                 const AsmPrinter &AP)
+    : AMDGPUMCInstLower(Ctx, ST, AP) {}
+
+void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+  for (const MachineOperand &MO : MI->explicit_operands()) {
+    MCOperand MCOp;
+    lowerOperand(MO, MCOp);
+    OutMI.addOperand(MCOp);
+  }
+}
+
+void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
+  const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
+  R600MCInstLower MCInstLowering(OutContext, STI, *this);
+
+  StringRef Err;
+  if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
+    LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
+    C.emitError("Illegal instruction detected: " + Err);
+    MI->print(errs());
+  }
+
+  if (MI->isBundle()) {
+    const MachineBasicBlock *MBB = MI->getParent();
+    MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
+    while (I != MBB->instr_end() && I->isInsideBundle()) {
+      emitInstruction(&*I);
+      ++I;
+    }
+  } else {
+    MCInst TmpInst;
+    MCInstLowering.lower(MI, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
+  }
+}
+
+const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) {
+  if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
+    return E;
+  return AsmPrinter::lowerConstant(CV);
+}

diff  --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
index 5ddfc6aac6cd8..365c005b25034 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "R600TargetTransformInfo.h"
+#include "AMDGPU.h"
 #include "AMDGPUTargetMachine.h"
 #include "R600Subtarget.h"
 

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
index 5af4b180c0058..2e4d83fbbc39c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPULDSUtils.h"
+#include "AMDGPU.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetVector.h"

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
index ffcafb9b76cec..a71920c79bc33 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
 
-#include "AMDGPU.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/Constants.h"