[llvm-commits] [PATCH 1/3] AMDGPU: Add core backend files for R600/SI codegen
Tom Stellard
tstellar at gmail.com
Mon Aug 27 07:46:07 PDT 2012
---
lib/Target/AMDGPU/AMDGPU.h | 35 +
lib/Target/AMDGPU/AMDGPU.td | 38 +
lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 125 +
lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 43 +
lib/Target/AMDGPU/AMDGPUCodeEmitter.h | 48 +
lib/Target/AMDGPU/AMDGPUConvertToISA.cpp | 62 +
lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 353 +++
lib/Target/AMDGPU/AMDGPUISelLowering.h | 142 +
lib/Target/AMDGPU/AMDGPUInstrInfo.cpp | 258 ++
lib/Target/AMDGPU/AMDGPUInstrInfo.h | 148 +
lib/Target/AMDGPU/AMDGPUInstrInfo.td | 69 +
lib/Target/AMDGPU/AMDGPUInstructions.td | 164 +
lib/Target/AMDGPU/AMDGPUIntrinsics.td | 64 +
lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 82 +
lib/Target/AMDGPU/AMDGPUMCInstLower.h | 30 +
lib/Target/AMDGPU/AMDGPURegisterInfo.cpp | 50 +
lib/Target/AMDGPU/AMDGPURegisterInfo.h | 62 +
lib/Target/AMDGPU/AMDGPURegisterInfo.td | 22 +
lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 94 +
lib/Target/AMDGPU/AMDGPUSubtarget.h | 64 +
lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 144 +
lib/Target/AMDGPU/AMDGPUTargetMachine.h | 70 +
lib/Target/AMDGPU/AMDIL.h | 106 +
lib/Target/AMDGPU/AMDIL7XXDevice.cpp | 129 +
lib/Target/AMDGPU/AMDIL7XXDevice.h | 70 +
lib/Target/AMDGPU/AMDILBase.td | 80 +
lib/Target/AMDGPU/AMDILCFGStructurizer.cpp | 3272 ++++++++++++++++++++
lib/Target/AMDGPU/AMDILDevice.cpp | 137 +
lib/Target/AMDGPU/AMDILDevice.h | 116 +
lib/Target/AMDGPU/AMDILDeviceInfo.cpp | 94 +
lib/Target/AMDGPU/AMDILDeviceInfo.h | 90 +
lib/Target/AMDGPU/AMDILDevices.h | 19 +
lib/Target/AMDGPU/AMDILEvergreenDevice.cpp | 169 +
lib/Target/AMDGPU/AMDILEvergreenDevice.h | 87 +
lib/Target/AMDGPU/AMDILFrameLowering.cpp | 53 +
lib/Target/AMDGPU/AMDILFrameLowering.h | 46 +
lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 395 +++
lib/Target/AMDGPU/AMDILISelLowering.cpp | 746 +++++
lib/Target/AMDGPU/AMDILInstrInfo.td | 276 ++
lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp | 93 +
lib/Target/AMDGPU/AMDILIntrinsicInfo.h | 47 +
lib/Target/AMDGPU/AMDILIntrinsics.td | 247 ++
lib/Target/AMDGPU/AMDILNIDevice.cpp | 71 +
lib/Target/AMDGPU/AMDILNIDevice.h | 59 +
lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp | 1282 ++++++++
lib/Target/AMDGPU/AMDILRegisterInfo.td | 110 +
lib/Target/AMDGPU/AMDILSIDevice.cpp | 49 +
lib/Target/AMDGPU/AMDILSIDevice.h | 45 +
lib/Target/AMDGPU/AMDILUtilityFunctions.h | 75 +
lib/Target/AMDGPU/CMakeLists.txt | 52 +
lib/Target/AMDGPU/GENERATED_FILES | 13 +
.../AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 34 +
lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h | 34 +
lib/Target/AMDGPU/InstPrinter/CMakeLists.txt | 7 +
lib/Target/AMDGPU/InstPrinter/LLVMBuild.txt | 24 +
lib/Target/AMDGPU/InstPrinter/Makefile | 15 +
lib/Target/AMDGPU/LLVMBuild.txt | 32 +
.../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 80 +
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 88 +
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h | 30 +
.../AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h | 59 +
.../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp | 112 +
.../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h | 53 +
lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt | 10 +
lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt | 23 +
lib/Target/AMDGPU/MCTargetDesc/Makefile | 16 +
.../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 669 ++++
lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp | 297 ++
lib/Target/AMDGPU/Makefile | 23 +
lib/Target/AMDGPU/Processors.td | 28 +
lib/Target/AMDGPU/R600Defines.h | 22 +
lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp | 163 +
lib/Target/AMDGPU/R600GenRegisterInfo.pl | 200 ++
lib/Target/AMDGPU/R600HwRegInfo.include | 1056 +++++++
lib/Target/AMDGPU/R600ISelLowering.cpp | 522 ++++
lib/Target/AMDGPU/R600ISelLowering.h | 56 +
lib/Target/AMDGPU/R600InstrInfo.cpp | 511 +++
lib/Target/AMDGPU/R600InstrInfo.h | 144 +
lib/Target/AMDGPU/R600Instructions.td | 1264 ++++++++
lib/Target/AMDGPU/R600Intrinsics.td | 16 +
lib/Target/AMDGPU/R600KernelParameters.cpp | 462 +++
lib/Target/AMDGPU/R600MachineFunctionInfo.cpp | 16 +
lib/Target/AMDGPU/R600MachineFunctionInfo.h | 33 +
lib/Target/AMDGPU/R600RegisterInfo.cpp | 108 +
lib/Target/AMDGPU/R600RegisterInfo.h | 58 +
lib/Target/AMDGPU/R600RegisterInfo.td | 1215 ++++++++
lib/Target/AMDGPU/R600Schedule.td | 36 +
lib/Target/AMDGPU/SIAssignInterpRegs.cpp | 136 +
lib/Target/AMDGPU/SIGenRegisterInfo.pl | 232 ++
lib/Target/AMDGPU/SIISelLowering.cpp | 408 +++
lib/Target/AMDGPU/SIISelLowering.h | 57 +
lib/Target/AMDGPU/SIInstrFormats.td | 131 +
lib/Target/AMDGPU/SIInstrInfo.cpp | 75 +
lib/Target/AMDGPU/SIInstrInfo.h | 61 +
lib/Target/AMDGPU/SIInstrInfo.td | 512 +++
lib/Target/AMDGPU/SIInstructions.td | 1077 +++++++
lib/Target/AMDGPU/SIIntrinsics.td | 35 +
lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 18 +
lib/Target/AMDGPU/SIMachineFunctionInfo.h | 37 +
lib/Target/AMDGPU/SIRegisterInfo.cpp | 50 +
lib/Target/AMDGPU/SIRegisterInfo.h | 47 +
lib/Target/AMDGPU/SIRegisterInfo.td | 887 ++++++
lib/Target/AMDGPU/SISchedule.td | 15 +
lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp | 26 +
lib/Target/AMDGPU/TargetInfo/CMakeLists.txt | 7 +
lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt | 23 +
lib/Target/AMDGPU/TargetInfo/Makefile | 15 +
lib/Target/LLVMBuild.txt | 2 +-
108 files changed, 21561 insertions(+), 1 deletions(-)
create mode 100644 lib/Target/AMDGPU/AMDGPU.h
create mode 100644 lib/Target/AMDGPU/AMDGPU.td
create mode 100644 lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
create mode 100644 lib/Target/AMDGPU/AMDGPUAsmPrinter.h
create mode 100644 lib/Target/AMDGPU/AMDGPUCodeEmitter.h
create mode 100644 lib/Target/AMDGPU/AMDGPUConvertToISA.cpp
create mode 100644 lib/Target/AMDGPU/AMDGPUISelLowering.cpp
create mode 100644 lib/Target/AMDGPU/AMDGPUISelLowering.h
create mode 100644 lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
create mode 100644 lib/Target/AMDGPU/AMDGPUInstrInfo.h
create mode 100644 lib/Target/AMDGPU/AMDGPUInstrInfo.td
create mode 100644 lib/Target/AMDGPU/AMDGPUInstructions.td
create mode 100644 lib/Target/AMDGPU/AMDGPUIntrinsics.td
create mode 100644 lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
create mode 100644 lib/Target/AMDGPU/AMDGPUMCInstLower.h
create mode 100644 lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
create mode 100644 lib/Target/AMDGPU/AMDGPURegisterInfo.h
create mode 100644 lib/Target/AMDGPU/AMDGPURegisterInfo.td
create mode 100644 lib/Target/AMDGPU/AMDGPUSubtarget.cpp
create mode 100644 lib/Target/AMDGPU/AMDGPUSubtarget.h
create mode 100644 lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
create mode 100644 lib/Target/AMDGPU/AMDGPUTargetMachine.h
create mode 100644 lib/Target/AMDGPU/AMDIL.h
create mode 100644 lib/Target/AMDGPU/AMDIL7XXDevice.cpp
create mode 100644 lib/Target/AMDGPU/AMDIL7XXDevice.h
create mode 100644 lib/Target/AMDGPU/AMDILBase.td
create mode 100644 lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
create mode 100644 lib/Target/AMDGPU/AMDILDevice.cpp
create mode 100644 lib/Target/AMDGPU/AMDILDevice.h
create mode 100644 lib/Target/AMDGPU/AMDILDeviceInfo.cpp
create mode 100644 lib/Target/AMDGPU/AMDILDeviceInfo.h
create mode 100644 lib/Target/AMDGPU/AMDILDevices.h
create mode 100644 lib/Target/AMDGPU/AMDILEvergreenDevice.cpp
create mode 100644 lib/Target/AMDGPU/AMDILEvergreenDevice.h
create mode 100644 lib/Target/AMDGPU/AMDILFrameLowering.cpp
create mode 100644 lib/Target/AMDGPU/AMDILFrameLowering.h
create mode 100644 lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
create mode 100644 lib/Target/AMDGPU/AMDILISelLowering.cpp
create mode 100644 lib/Target/AMDGPU/AMDILInstrInfo.td
create mode 100644 lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp
create mode 100644 lib/Target/AMDGPU/AMDILIntrinsicInfo.h
create mode 100644 lib/Target/AMDGPU/AMDILIntrinsics.td
create mode 100644 lib/Target/AMDGPU/AMDILNIDevice.cpp
create mode 100644 lib/Target/AMDGPU/AMDILNIDevice.h
create mode 100644 lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp
create mode 100644 lib/Target/AMDGPU/AMDILRegisterInfo.td
create mode 100644 lib/Target/AMDGPU/AMDILSIDevice.cpp
create mode 100644 lib/Target/AMDGPU/AMDILSIDevice.h
create mode 100644 lib/Target/AMDGPU/AMDILUtilityFunctions.h
create mode 100644 lib/Target/AMDGPU/CMakeLists.txt
create mode 100644 lib/Target/AMDGPU/GENERATED_FILES
create mode 100644 lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
create mode 100644 lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
create mode 100644 lib/Target/AMDGPU/InstPrinter/CMakeLists.txt
create mode 100644 lib/Target/AMDGPU/InstPrinter/LLVMBuild.txt
create mode 100644 lib/Target/AMDGPU/InstPrinter/Makefile
create mode 100644 lib/Target/AMDGPU/LLVMBuild.txt
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/Makefile
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
create mode 100644 lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
create mode 100644 lib/Target/AMDGPU/Makefile
create mode 100644 lib/Target/AMDGPU/Processors.td
create mode 100644 lib/Target/AMDGPU/R600Defines.h
create mode 100644 lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
create mode 100644 lib/Target/AMDGPU/R600GenRegisterInfo.pl
create mode 100644 lib/Target/AMDGPU/R600HwRegInfo.include
create mode 100644 lib/Target/AMDGPU/R600ISelLowering.cpp
create mode 100644 lib/Target/AMDGPU/R600ISelLowering.h
create mode 100644 lib/Target/AMDGPU/R600InstrInfo.cpp
create mode 100644 lib/Target/AMDGPU/R600InstrInfo.h
create mode 100644 lib/Target/AMDGPU/R600Instructions.td
create mode 100644 lib/Target/AMDGPU/R600Intrinsics.td
create mode 100644 lib/Target/AMDGPU/R600KernelParameters.cpp
create mode 100644 lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
create mode 100644 lib/Target/AMDGPU/R600MachineFunctionInfo.h
create mode 100644 lib/Target/AMDGPU/R600RegisterInfo.cpp
create mode 100644 lib/Target/AMDGPU/R600RegisterInfo.h
create mode 100644 lib/Target/AMDGPU/R600RegisterInfo.td
create mode 100644 lib/Target/AMDGPU/R600Schedule.td
create mode 100644 lib/Target/AMDGPU/SIAssignInterpRegs.cpp
create mode 100644 lib/Target/AMDGPU/SIGenRegisterInfo.pl
create mode 100644 lib/Target/AMDGPU/SIISelLowering.cpp
create mode 100644 lib/Target/AMDGPU/SIISelLowering.h
create mode 100644 lib/Target/AMDGPU/SIInstrFormats.td
create mode 100644 lib/Target/AMDGPU/SIInstrInfo.cpp
create mode 100644 lib/Target/AMDGPU/SIInstrInfo.h
create mode 100644 lib/Target/AMDGPU/SIInstrInfo.td
create mode 100644 lib/Target/AMDGPU/SIInstructions.td
create mode 100644 lib/Target/AMDGPU/SIIntrinsics.td
create mode 100644 lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
create mode 100644 lib/Target/AMDGPU/SIMachineFunctionInfo.h
create mode 100644 lib/Target/AMDGPU/SIRegisterInfo.cpp
create mode 100644 lib/Target/AMDGPU/SIRegisterInfo.h
create mode 100644 lib/Target/AMDGPU/SIRegisterInfo.td
create mode 100644 lib/Target/AMDGPU/SISchedule.td
create mode 100644 lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
create mode 100644 lib/Target/AMDGPU/TargetInfo/CMakeLists.txt
create mode 100644 lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt
create mode 100644 lib/Target/AMDGPU/TargetInfo/Makefile
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
new file mode 100644
index 0000000..514adb6
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -0,0 +1,35 @@
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_H
+#define AMDGPU_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class AMDGPUTargetMachine;
+
+// R600 Passes
+FunctionPass* createR600KernelParametersPass(const TargetData* TD);
+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+
+// SI Passes
+FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+
+// Passes common to R600 and SI
+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+
+} // End namespace llvm
+
+#endif // AMDGPU_H
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
new file mode 100644
index 0000000..5086f63
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -0,0 +1,38 @@
+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+// Include AMDIL TD files
+include "AMDILBase.td"
+
+
+def AMDGPUInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+def AMDGPUAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
+def AMDGPU : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AMDGPUInstrInfo;
+ let AssemblyWriters = [AMDGPUAsmWriter];
+}
+
+// Include AMDGPU TD files
+include "R600Schedule.td"
+include "SISchedule.td"
+include "Processors.td"
+include "AMDGPUInstrInfo.td"
+include "AMDGPUIntrinsics.td"
+include "AMDGPURegisterInfo.td"
+include "AMDGPUInstructions.td"
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
new file mode 100644
index 0000000..295e562
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -0,0 +1,125 @@
+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDGPUAsmPrinter is used to print both assembly string and also binary
+// code. When passed an MCAsmStreamer it prints assembly and when passed
+// an MCObjectStreamer it outputs binary code.
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+#include "AMDGPUAsmPrinter.h"
+#include "AMDGPU.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+
+static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
+ MCStreamer &Streamer) {
+ return new AMDGPUAsmPrinter(tm, Streamer);
+}
+
+extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
+ TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
+}
+
+/// runOnMachineFunction - We need to override this function so we can avoid
+/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ if (STM.dumpCode()) {
+ MF.dump();
+ }
+ SetupMachineFunction(MF);
+ if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ EmitProgramInfo(MF);
+ }
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+ EmitFunctionBody();
+ return false;
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
+ unsigned MaxSGPR = 0;
+ unsigned MaxVGPR = 0;
+ bool VCCUsed = false;
+ const SIRegisterInfo * RI =
+ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+
+ unsigned numOperands = MI.getNumOperands();
+ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+ MachineOperand & MO = MI.getOperand(op_idx);
+ unsigned maxUsed;
+ unsigned width = 0;
+ bool isSGPR = false;
+ unsigned reg;
+ unsigned hwReg;
+ if (!MO.isReg()) {
+ continue;
+ }
+ reg = MO.getReg();
+ if (reg == AMDGPU::VCC) {
+ VCCUsed = true;
+ continue;
+ }
+ if (AMDGPU::SReg_32RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 1;
+ } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 1;
+ } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 2;
+ } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 2;
+ } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 4;
+ } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 4;
+ } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 8;
+ } else {
+ assert("!Unknown register class");
+ }
+ hwReg = RI->getEncodingValue(reg);
+ maxUsed = hwReg + width - 1;
+ if (isSGPR) {
+ MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
+ } else {
+ MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
+ }
+ }
+ }
+ }
+ if (VCCUsed) {
+ MaxSGPR += 2;
+ }
+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+ OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
+ OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
+ OutStreamer.EmitIntValue(MFI->spi_ps_input_addr, 4);
+}
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
new file mode 100644
index 0000000..b35d2e9
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -0,0 +1,43 @@
+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AMDGPU Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_ASMPRINTER_H
+#define AMDGPU_ASMPRINTER_H
+
+#include "llvm/CodeGen/AsmPrinter.h"
+
+namespace llvm {
+
+class AMDGPUAsmPrinter : public AsmPrinter {
+
+public:
+ explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "AMDGPU Assembly Printer";
+ }
+
+ /// EmitProgramInfo - Emit register usage information so that the GPU driver
+ /// can correctly setup the GPU state.
+ void EmitProgramInfo(MachineFunction &MF);
+
+ /// EmitInstuction - Implemented in AMDGPUMCInstLower.cpp
+ virtual void EmitInstruction(const MachineInstr *MI);
+};
+
+} // End anonymous llvm
+
+#endif //AMDGPU_ASMPRINTER_H
diff --git a/lib/Target/AMDGPU/AMDGPUCodeEmitter.h b/lib/Target/AMDGPU/AMDGPUCodeEmitter.h
new file mode 100644
index 0000000..f1daec1
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUCodeEmitter.h
@@ -0,0 +1,48 @@
+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUCODEEMITTER_H
+#define AMDGPUCODEEMITTER_H
+
+namespace llvm {
+
+ class AMDGPUCodeEmitter {
+ public:
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const { return 0; }
+ virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual uint64_t VOPPostEncode(const MachineInstr &MI,
+ uint64_t Value) const {
+ return Value;
+ }
+ virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
+ const {
+ return 0;
+ }
+ };
+
+} // End namespace llvm
+
+#endif // AMDGPUCODEEMITTER_H
diff --git a/lib/Target/AMDGPU/AMDGPUConvertToISA.cpp b/lib/Target/AMDGPU/AMDGPUConvertToISA.cpp
new file mode 100644
index 0000000..fbca0a7
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUConvertToISA.cpp
@@ -0,0 +1,62 @@
+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers AMDIL machine instructions to the appropriate hardware
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUConvertToISAPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ TargetMachine &TM;
+
+public:
+ AMDGPUConvertToISAPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUConvertToISAPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
+ return new AMDGPUConvertToISAPass(tm);
+}
+
+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
+{
+ const AMDGPUInstrInfo * TII =
+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
new file mode 100644
index 0000000..0a70164
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -0,0 +1,353 @@
+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the parent TargetLowering class for hardware code gen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDILIntrinsicInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+using namespace llvm;
+
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
+ TargetLowering(TM, new TargetLoweringObjectFileELF())
+{
+
+ // Initialize target lowering borrowed from AMDIL
+ InitAMDILLowering();
+
+ // We need to custom lower some of the intrinsics
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Library functions. These default to Expand, but we have instructions
+ // for them.
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FEXP2, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+}
+
+//===---------------------------------------------------------------------===//
+// TargetLowering Callbacks
+//===---------------------------------------------------------------------===//
+
+SDValue AMDGPUTargetLowering::LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const
+{
+ // Lowering of arguments happens in R600LowerKernelParameters, so we can
+ // ignore the arguments here.
+ for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
+ InVals.push_back(SDValue());
+ }
+ return Chain;
+}
+
+SDValue AMDGPUTargetLowering::LowerReturn(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const
+{
+ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
+}
+
+//===---------------------------------------------------------------------===//
+// Target specific lowering
+//===---------------------------------------------------------------------===//
+
+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+ const
+{
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ assert(0 && "Custom lowering code for this"
+ "instruction is not implemented yet!");
+ break;
+ // AMDIL DAG lowering
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::SREM: return LowerSREM(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ // AMDGPU DAG lowering
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ }
+ return Op;
+}
+
+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ switch (IntrinsicID) {
+ default: return Op;
+ case AMDGPUIntrinsic::AMDIL_abs:
+ return LowerIntrinsicIABS(Op, DAG);
+ case AMDGPUIntrinsic::AMDIL_exp:
+ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDIL_fabs:
+ return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDGPU_lrp:
+ return LowerIntrinsicLRP(Op, DAG);
+ case AMDGPUIntrinsic::AMDIL_fraction:
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDIL_mad:
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ case AMDGPUIntrinsic::AMDIL_max:
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_imax:
+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umax:
+ return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDIL_min:
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_imin:
+ return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umin:
+ return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDIL_round_nearest:
+ return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDIL_round_posinf:
+ return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
+ }
+}
+
+///IABS(a) = SMAX(sub(0, a), a)
+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
+ SelectionDAG &DAG) const
+{
+
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ Op.getOperand(1));
+
+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
+}
+
+/// Linear Interpolation
+/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(1.0f, MVT::f32),
+ Op.getOperand(1));
+ SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
+ Op.getOperand(3));
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
+ Op.getOperand(2),
+ OneSubAC);
+}
+
+
+
+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue Num = Op.getOperand(0);
+ SDValue Den = Op.getOperand(1);
+
+ SmallVector<SDValue, 8> Results;
+
+ // RCP = URECIP(Den) = 2^32 / Den + e
+ // e is rounding error.
+ SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
+
+ // RCP_LO = umulo(RCP, Den) */
+ SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
+
+ // RCP_HI = mulhu (RCP, Den) */
+ SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
+
+ // NEG_RCP_LO = -RCP_LO
+ SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ RCP_LO);
+
+ // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
+ SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ NEG_RCP_LO, RCP_LO,
+ ISD::SETEQ);
+ // Calculate the rounding error from the URECIP instruction
+ // E = mulhu(ABS_RCP_LO, RCP)
+ SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
+
+ // RCP_A_E = RCP + E
+ SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
+
+ // RCP_S_E = RCP - E
+ SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
+
+ // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
+ SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ RCP_A_E, RCP_S_E,
+ ISD::SETEQ);
+ // Quotient = mulhu(Tmp0, Num)
+ SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
+
+ // Num_S_Remainder = Quotient * Den
+ SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
+
+ // Remainder = Num - Num_S_Remainder
+ SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
+
+ // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
+ SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
+ DAG.getConstant(-1, VT),
+ DAG.getConstant(0, VT),
+ ISD::SETGE);
+ // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
+ DAG.getConstant(0, VT),
+ DAG.getConstant(-1, VT),
+ DAG.getConstant(0, VT),
+ ISD::SETGE);
+ // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
+ SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
+ Remainder_GE_Zero);
+
+ // Calculate Division result:
+
+ // Quotient_A_One = Quotient + 1
+ SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
+ DAG.getConstant(1, VT));
+
+ // Quotient_S_One = Quotient - 1
+ SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
+ DAG.getConstant(1, VT));
+
+ // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
+ SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ Quotient, Quotient_A_One, ISD::SETEQ);
+
+ // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
+ Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Quotient_S_One, Div, ISD::SETEQ);
+
+ // Calculate Rem result:
+
+ // Remainder_S_Den = Remainder - Den
+ SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
+
+ // Remainder_A_Den = Remainder + Den
+ SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
+
+ // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
+ SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ Remainder, Remainder_S_Den, ISD::SETEQ);
+
+ // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
+ Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Remainder_A_Den, Rem, ISD::SETEQ);
+
+ DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
+ DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
+
+ return Op;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
+{
+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ return CFP->isExactlyValue(1.0);
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ return C->isAllOnesValue();
+ }
+ return false;
+}
+
+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
+{
+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ return CFP->getValueAPF().isZero();
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ return C->isNullValue();
+ }
+ return false;
+}
+
+SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned VirtualRegister;
+ if (!MRI.isLiveIn(Reg)) {
+ VirtualRegister = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(Reg, VirtualRegister);
+ } else {
+ VirtualRegister = MRI.getLiveInVirtReg(Reg);
+ }
+ return DAG.getRegister(VirtualRegister, VT);
+}
+
+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
+
+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode) {
+ default: return 0;
+ // AMDIL DAG nodes
+ NODE_NAME_CASE(MAD);
+ NODE_NAME_CASE(CALL);
+ NODE_NAME_CASE(UMUL);
+ NODE_NAME_CASE(DIV_INF);
+ NODE_NAME_CASE(VBUILD);
+ NODE_NAME_CASE(RET_FLAG);
+ NODE_NAME_CASE(BRANCH_COND);
+
+ // AMDGPU DAG nodes
+ NODE_NAME_CASE(FRACT)
+ NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(SMAX)
+ NODE_NAME_CASE(UMAX)
+ NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(SMIN)
+ NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(URECIP)
+ }
+}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
new file mode 100644
index 0000000..4c100da
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -0,0 +1,142 @@
+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the interface defintiion of the TargetLowering class
+// that is common to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUISELLOWERING_H
+#define AMDGPUISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class MachineRegisterInfo;
+
+class AMDGPUTargetLowering : public TargetLowering
+{
+private:
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+
+protected:
+
+ /// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list
+ /// of the DAG's MachineFunction. This returns a Register SDNode representing
+ /// Reg.
+ SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const;
+
+ bool isHWTrueValue(SDValue Op) const;
+ bool isHWFalseValue(SDValue Op) const;
+
+public:
+ AMDGPUTargetLowering(TargetMachine &TM);
+
+ virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
+ virtual const char* getTargetNodeName(unsigned Opcode) const;
+
+// Functions defined in AMDILISelLowering.cpp
+public:
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const;
+
+ /// isFPImmLegal - We want to mark f32/f64 floating point values as legal.
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ /// ShouldShrinkFPConstant - We don't want to shrink f64/f32 constants.
+ bool ShouldShrinkFPConstant(EVT VT) const;
+
+private:
+ void InitAMDILLowering();
+ SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+};
+
+namespace AMDGPUISD
+{
+
+enum
+{
+ // AMDIL ISD Opcodes
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ MAD, // 32bit Fused Multiply Add instruction
+ VBUILD, // scalar to vector mov instruction
+ CALL, // Function call based on a single integer
+ UMUL, // 32bit unsigned multiplication
+ DIV_INF, // Divide with infinity returned on zero divisor
+ RET_FLAG,
+ BRANCH_COND,
+ // End AMDIL ISD Opcodes
+ BITALIGN,
+ FRACT,
+ FMAX,
+ SMAX,
+ UMAX,
+ FMIN,
+ SMIN,
+ UMIN,
+ URECIP,
+ LAST_AMDGPU_ISD_NUMBER
+};
+
+
+} // End namespace AMDGPUISD
+
+namespace SIISD {
+
+enum {
+ SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
+ VCC_AND,
+ VCC_BITCAST
+};
+
+} // End namespace SIISD
+
+} // End namespace llvm
+
+#endif // AMDGPUISELLOWERING_H
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
new file mode 100644
index 0000000..9aae09a
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -0,0 +1,258 @@
+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the TargetInstrInfo class that is
+// common to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDIL.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AMDGPUGenInstrInfo.inc"
+
+using namespace llvm;
+
+AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
+ : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
+
+const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+// TODO: Implement this function
+ return false;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+
+MachineInstr *
+AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+// TODO: Implement this function
+ return NULL;
+}
+bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const {
+ while (iter != MBB.end()) {
+ switch (iter->getOpcode()) {
+ default:
+ break;
+ ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
+ case AMDGPU::BRANCH:
+ return true;
+ };
+ ++iter;
+ }
+ return false;
+}
+
+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator tmp = MBB->end();
+ if (!MBB->size()) {
+ return MBB->end();
+ }
+ while (--tmp) {
+ if (tmp->getOpcode() == AMDGPU::ENDLOOP
+ || tmp->getOpcode() == AMDGPU::ENDIF
+ || tmp->getOpcode() == AMDGPU::ELSE) {
+ if (tmp == MBB->begin()) {
+ return tmp;
+ } else {
+ continue;
+ }
+ } else {
+ return ++tmp;
+ }
+ }
+ return MBB->end();
+}
+
+void
+AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(!"Not Implemented");
+}
+
+void
+AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(!"Not Implemented");
+}
+
+MachineInstr *
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+MachineInstr*
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // TODO: Implement this function
+ return 0;
+}
+bool
+AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const
+{
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad,
+ bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ // TODO: Implement this function
+ return false;
+}
+
+unsigned
+AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ // TODO: Implement this function
+ return 0;
+}
+
+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ assert(Offset2 > Offset1
+ && "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 16,
+ // then schedule together.
+ // TODO: Make the loads schedule near if it fits in a cacheline
+ return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+}
+
+bool
+AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // TODO: Implement this function
+ return true;
+}
+void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ // TODO: Implement this function
+}
+
+bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2)
+ const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
+ // TODO: Implement this function
+ return MI->getDesc().isPredicable();
+}
+
+bool
+AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // TODO: Implement this function
+ return true;
+}
+
+void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const
+{
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AMDGPURegisterInfo & RI = getRegisterInfo();
+
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ MachineOperand &MO = MI.getOperand(i);
+ // Convert dst regclass to one that is supported by the ISA
+ if (MO.isReg() && MO.isDef()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
+
+ assert(newRegClass);
+
+ MRI.setRegClass(MO.getReg(), newRegClass);
+ }
+ }
+ }
+}
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
new file mode 100644
index 0000000..2643119
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -0,0 +1,148 @@
+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of a TargetInstrInfo class that is common
+// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTRUCTIONINFO_H_
+#define AMDGPUINSTRUCTIONINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include <map>
+
+#define GET_INSTRINFO_HEADER
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define OPCODE_IS_ZERO_INT 0x00000045
+#define OPCODE_IS_NOT_ZERO_INT 0x00000042
+#define OPCODE_IS_ZERO 0x00000020
+#define OPCODE_IS_NOT_ZERO 0x00000023
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
+
+class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
+private:
+ const AMDGPURegisterInfo RI;
+ TargetMachine &TM;
+ bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const;
+public:
+ explicit AMDGPUInstrInfo(TargetMachine &tm);
+
+ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+
+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &DstReg, unsigned &SubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+ unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+
+ MachineInstr *
+ convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const = 0;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+protected:
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const;
+public:
+ bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const;
+ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode *> &NewNodes) const;
+ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = 0) const;
+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool isPredicated(const MachineInstr *MI) const;
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ bool isPredicable(MachineInstr *MI) const;
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ // Helper functions that check the opcode for status information
+ bool isLoadInst(llvm::MachineInstr *MI) const;
+ bool isExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isSExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isZExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isAExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isStoreInst(llvm::MachineInstr *MI) const;
+ bool isTruncStoreInst(llvm::MachineInstr *MI) const;
+
+ virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const = 0;
+ virtual unsigned getIEQOpcode() const = 0;
+ virtual bool isMov(unsigned opcode) const = 0;
+
+ /// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
+ /// MachineInstr
+ virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const;
+
+};
+
+} // End llvm namespace
+
+#endif // AMDGPUINSTRINFO_H_
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
new file mode 100644
index 0000000..4452719
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -0,0 +1,69 @@
+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DAG node defintions for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Profiles
+//===----------------------------------------------------------------------===//
+
+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Nodes
+//
+
+// out = ((a << 32) | b) >> c)
+//
+// Can be used to optimize rtol:
+// rotl(a, b) = bitalign(a, a, 32 - b)
+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
+
+// out = a - floor(a)
+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
+
+// out = max(a, b) a and b are floats
+def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are signed ints
+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are unsigned ints
+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are floats
+def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a snd b are signed ints
+def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are unsigned ints
+def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// urecip - This operation is a helper for integer division, it returns the
+// result of 1 / a as a fractional unsigned integer.
+// out = (2^32 / a) + e
+// e is rounding error
+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
new file mode 100644
index 0000000..c8a7db6
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -0,0 +1,164 @@
+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction defs that are common to all hw codegen
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
+ field bits<16> AMDILOp = 0;
+ field bits<3> Gen = 0;
+
+ let Namespace = "AMDGPU";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let Itinerary = NullALU;
+ let TSFlags{42-40} = Gen;
+ let TSFlags{63-48} = AMDILOp;
+}
+
+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
+ : AMDGPUInst<outs, ins, asm, pattern> {
+
+ field bits<32> Inst = 0xffffffff;
+
+}
+
+def COND_EQ : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOEQ: case ISD::SETUEQ:
+ case ISD::SETEQ: return true;}}}]
+>;
+
+def COND_NE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETONE: case ISD::SETUNE:
+ case ISD::SETNE: return true;}}}]
+>;
+def COND_GT : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOGT: case ISD::SETUGT:
+ case ISD::SETGT: return true;}}}]
+>;
+
+def COND_GE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOGE: case ISD::SETUGE:
+ case ISD::SETGE: return true;}}}]
+>;
+
+def COND_LT : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOLT: case ISD::SETULT:
+ case ISD::SETLT: return true;}}}]
+>;
+
+def COND_LE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOLE: case ISD::SETULE:
+ case ISD::SETLE: return true;}}}]
+>;
+
+class Constants {
+int TWO_PI = 0x40c90fdb;
+int PI = 0x40490fdb;
+int TWO_PI_INV = 0x3e22f983;
+}
+def CONST : Constants;
+
+def FP_ZERO : PatLeaf <
+ (fpimm),
+ [{return N->getValueAPF().isZero();}]
+>;
+
+def FP_ONE : PatLeaf <
+ (fpimm),
+ [{return N->isExactlyValue(1.0);}]
+>;
+
+let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
+
+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "CLAMP $dst, $src0",
+ [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+>;
+
+class FABS <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FABS $dst, $src0",
+ [(set rc:$dst, (fabs rc:$src0))]
+>;
+
+class FNEG <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FNEG $dst, $src0",
+ [(set rc:$dst, (fneg rc:$src0))]
+>;
+
+} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
+
+/* Generic helper patterns for intrinsics */
+/* -------------------------------------- */
+
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
+ RegisterClass rc> : Pat <
+ (int_AMDGPU_pow rc:$src0, rc:$src1),
+ (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
+>;
+
+/* Other helper patterns */
+/* --------------------- */
+
+/* Extract element pattern */
+class Extract_Element <ValueType sub_type, ValueType vec_type,
+ RegisterClass vec_class, int sub_idx,
+ SubRegIndex sub_reg>: Pat<
+ (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
+ (EXTRACT_SUBREG vec_class:$src, sub_reg)
+>;
+
+/* Insert element pattern */
+class Insert_Element <ValueType elem_type, ValueType vec_type,
+ RegisterClass elem_class, RegisterClass vec_class,
+ int sub_idx, SubRegIndex sub_reg> : Pat <
+
+ (vec_type (vector_insert (vec_type vec_class:$vec),
+ (elem_type elem_class:$elem), sub_idx)),
+ (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
+>;
+
+// Vector Build pattern
+class Vector_Build <ValueType vecType, RegisterClass elemClass> : Pat <
+ (IL_vbuild elemClass:$src),
+ (INSERT_SUBREG (vecType (IMPLICIT_DEF)), elemClass:$src, sel_x)
+>;
+
+// bitconvert pattern
+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
+ (dt (bitconvert (st rc:$src0))),
+ (dt rc:$src0)
+>;
+
+include "R600Instructions.td"
+
+include "SIInstrInfo.td"
+
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
new file mode 100644
index 0000000..89cc7e1
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -0,0 +1,64 @@
+//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines intrinsics that are used by all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "AMDGPU", isTarget = 1 in {
+
+ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+ def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
+ def int_AMDGPU_kilp : Intrinsic<[], [], []>;
+ def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+let TargetPrefix = "TGSI", isTarget = 1 in {
+
+ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
+}
+
+include "SIIntrinsics.td"
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
new file mode 100644
index 0000000..f3d80a3
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -0,0 +1,82 @@
+//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower AMDGPU MachineInstrs to their corresponding
+// MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUMCInstLower.h"
+#include "AMDGPUAsmPrinter.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+AMDGPUMCInstLower::AMDGPUMCInstLower() { }
+
+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_FPImmediate: {
+ const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
+ assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
+ "Only floating point immediates are supported at the moment.");
+ MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_Register:
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ }
+ OutMI.addOperand(MCOp);
+ }
+}
+
+void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ AMDGPUMCInstLower MCInstLowering;
+
+ // Ignore placeholder instructions:
+ if (MI->getOpcode() == AMDGPU::MASK_WRITE) {
+ return;
+ }
+
+ if (MI->isBundle()) {
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator I = MI;
+ ++I;
+ while (I != MBB->end() && I->isInsideBundle()) {
+ MCInst MCBundleInst;
+ const MachineInstr *BundledInst = I;
+ MCInstLowering.lower(BundledInst, MCBundleInst);
+ OutStreamer.EmitInstruction(MCBundleInst);
+ ++I;
+ }
+ } else {
+ MCInst TmpInst;
+ MCInstLowering.lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+}
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/lib/Target/AMDGPU/AMDGPUMCInstLower.h
new file mode 100644
index 0000000..3f68ff0
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.h
@@ -0,0 +1,30 @@
+//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_MCINSTLOWER_H
+#define AMDGPU_MCINSTLOWER_H
+
+namespace llvm {
+
+class MCInst;
+class MachineInstr;
+
+class AMDGPUMCInstLower {
+
+public:
+ AMDGPUMCInstLower();
+
+ /// lower - Lower a MachineInstr to an MCInst
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+};
+
+} // End namespace llvm
+
+#endif //AMDGPU_MCINSTLOWER_H
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
new file mode 100644
index 0000000..69bda63
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -0,0 +1,50 @@
+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Parent TargetRegisterInfo class common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPUGenRegisterInfo(0),
+ TM(tm),
+ TII(tii)
+ { }
+
+//===----------------------------------------------------------------------===//
+// Function handling callbacks - Functions are a seldom used feature of GPUS, so
+// they are not supported at this time.
+//===----------------------------------------------------------------------===//
+
+const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
+
+const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ return &CalleeSavedReg;
+}
+
+void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj,
+ RegScavenger *RS) const {
+ assert(!"Subroutines not supported yet");
+}
+
+unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ assert(!"Subroutines not supported yet");
+ return 0;
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "AMDGPUGenRegisterInfo.inc"
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
new file mode 100644
index 0000000..326610d
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -0,0 +1,62 @@
+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the TargetRegisterInfo interface that is implemented
+// by all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUREGISTERINFO_H_
+#define AMDGPUREGISTERINFO_H_
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo
+{
+ TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ static const uint16_t CalleeSavedReg;
+
+ AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+ assert(!"Unimplemented"); return BitVector();
+ }
+
+ /// getISARegClass - rc is an AMDIL reg class. This function returns the
+ /// ISA reg class that is equivalent to the given AMDIL reg class.
+ virtual const TargetRegisterClass * getISARegClass(
+ const TargetRegisterClass * rc) const {
+ assert(!"Unimplemented"); return NULL;
+ }
+
+ virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
+ assert(!"Unimplemented"); return NULL;
+ }
+
+ const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ RegScavenger *RS) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/lib/Target/AMDGPU/AMDGPURegisterInfo.td
new file mode 100644
index 0000000..8181e02
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.td
@@ -0,0 +1,22 @@
+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tablegen register definitions common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AMDGPU" in {
+ def sel_x : SubRegIndex;
+ def sel_y : SubRegIndex;
+ def sel_z : SubRegIndex;
+ def sel_w : SubRegIndex;
+}
+
+include "R600RegisterInfo.td"
+include "SIRegisterInfo.td"
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
new file mode 100644
index 0000000..d4a70b6
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -0,0 +1,94 @@
+//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AMDGPUGenSubtargetInfo.inc"
+
+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
+ AMDGPUGenSubtargetInfo(TT, CPU, FS), mDumpCode(false) {
+ InstrItins = getInstrItineraryForCPU(CPU);
+
+ memset(CapsOverride, 0, sizeof(*CapsOverride)
+ * AMDGPUDeviceInfo::MaxNumberCapabilities);
+ // Default card
+ StringRef GPU = CPU;
+ mIs64bit = false;
+ mDefaultSize[0] = 64;
+ mDefaultSize[1] = 1;
+ mDefaultSize[2] = 1;
+ ParseSubtargetFeatures(GPU, FS);
+ mDevName = GPU;
+ mDevice = AMDGPUDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
+}
+
+AMDGPUSubtarget::~AMDGPUSubtarget()
+{
+ delete mDevice;
+}
+
+bool
+AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const
+{
+ assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
+ "Caps index is out of bounds!");
+ return CapsOverride[caps];
+}
+bool
+AMDGPUSubtarget::is64bit() const
+{
+ return mIs64bit;
+}
+bool
+AMDGPUSubtarget::isTargetELF() const
+{
+ return false;
+}
+size_t
+AMDGPUSubtarget::getDefaultSize(uint32_t dim) const
+{
+ if (dim > 3) {
+ return 1;
+ } else {
+ return mDefaultSize[dim];
+ }
+}
+
+std::string
+AMDGPUSubtarget::getDataLayout() const
+{
+ if (!mDevice) {
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
+ }
+ return mDevice->getDataLayout();
+}
+
+std::string
+AMDGPUSubtarget::getDeviceName() const
+{
+ return mDevName;
+}
+const AMDGPUDevice *
+AMDGPUSubtarget::device() const
+{
+ return mDevice;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
new file mode 100644
index 0000000..8613861
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -0,0 +1,64 @@
+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file declares the AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDGPUSUBTARGET_H_
+#define _AMDGPUSUBTARGET_H_
+#include "AMDILDevice.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define MAX_CB_SIZE (1 << 16)
+
+namespace llvm {
+
+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo
+{
+private:
+ bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
+ const AMDGPUDevice *mDevice;
+ size_t mDefaultSize[3];
+ size_t mMinimumSize[3];
+ std::string mDevName;
+ bool mIs64bit;
+ bool mIs32on64bit;
+ bool mDumpCode;
+
+ InstrItineraryData InstrItins;
+
+public:
+ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+ virtual ~AMDGPUSubtarget();
+
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
+
+ bool isOverride(AMDGPUDeviceInfo::Caps) const;
+ bool is64bit() const;
+
+ // Helper functions to simplify if statements
+ bool isTargetELF() const;
+ const AMDGPUDevice* device() const;
+ std::string getDataLayout() const;
+ std::string getDeviceName() const;
+ virtual size_t getDefaultSize(uint32_t dim) const;
+ bool dumpCode() const { return mDumpCode; }
+
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUSUBTARGET_H_
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
new file mode 100644
index 0000000..7f5f6bd
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -0,0 +1,144 @@
+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDGPU target machine contains all of the hardware specific information
+// needed to emit code for R600 and SI GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include <llvm/CodeGen/Passes.h>
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeAMDGPUTarget() {
+ // Register the target
+ RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
+}
+
+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OptLevel
+)
+:
+ LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+ Subtarget(TT, CPU, FS),
+ DataLayout(Subtarget.getDataLayout()),
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+ Subtarget.device()->getStackAlignment(), 0),
+ IntrinsicInfo(this),
+ InstrItins(&Subtarget.getInstrItineraryData()),
+ mDump(false)
+
+{
+ // TLInfo uses InstrInfo so it must be initialized after.
+ if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ InstrInfo = new R600InstrInfo(*this);
+ TLInfo = new R600TargetLowering(*this);
+ } else {
+ InstrInfo = new SIInstrInfo(*this);
+ TLInfo = new SITargetLowering(*this);
+ }
+}
+
+AMDGPUTargetMachine::~AMDGPUTargetMachine()
+{
+}
+
+namespace {
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+ return getTM<AMDGPUTargetMachine>();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // End of anonymous namespace
+
+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AMDGPUPassConfig(this, PM);
+}
+
+bool
+AMDGPUPassConfig::addPreISel()
+{
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createR600KernelParametersPass(
+ getAMDGPUTargetMachine().getTargetData()));
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addInstSelector() {
+ addPass(createAMDGPUPeepholeOpt(*TM));
+ addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreRegAlloc() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createSIAssignInterpRegsPass(*TM));
+ }
+ addPass(createAMDGPUConvertToISAPass(*TM));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPostRegAlloc() {
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreSched2() {
+
+ addPass(&IfConverterID);
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreEmitPass() {
+ addPass(createAMDGPUCFGPreparationPass(*TM));
+ addPass(createAMDGPUCFGStructurizerPass(*TM));
+
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createR600ExpandSpecialInstrsPass(*TM));
+ addPass(&FinalizeMachineBundlesID);
+ }
+
+ return false;
+}
+
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
new file mode 100644
index 0000000..8b405a8
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -0,0 +1,70 @@
+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDGPU TargetMachine interface definition for hw codgen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_TARGET_MACHINE_H
+#define AMDGPU_TARGET_MACHINE_H
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILFrameLowering.h"
+#include "AMDILIntrinsicInfo.h"
+#include "R600ISelLowering.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
+
+class AMDGPUTargetMachine : public LLVMTargetMachine {
+
+ AMDGPUSubtarget Subtarget;
+ const TargetData DataLayout;
+ AMDGPUFrameLowering FrameLowering;
+ AMDGPUIntrinsicInfo IntrinsicInfo;
+ const AMDGPUInstrInfo * InstrInfo;
+ AMDGPUTargetLowering * TLInfo;
+ const InstrItineraryData* InstrItins;
+ bool mDump;
+
+public:
+ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ ~AMDGPUTargetMachine();
+ virtual const AMDGPUFrameLowering* getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
+ return &IntrinsicInfo;
+ }
+ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
+ virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
+ virtual const AMDGPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+ virtual AMDGPUTargetLowering * getTargetLowering() const {
+ return TLInfo;
+ }
+ virtual const InstrItineraryData* getInstrItineraryData() const {
+ return InstrItins;
+ }
+ virtual const TargetData* getTargetData() const { return &DataLayout; }
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+} // End namespace llvm
+
+#endif // AMDGPU_TARGET_MACHINE_H
diff --git a/lib/Target/AMDGPU/AMDIL.h b/lib/Target/AMDGPU/AMDIL.h
new file mode 100644
index 0000000..e96b123
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDIL.h
@@ -0,0 +1,106 @@
+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AMDGPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H_
+#define AMDIL_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID 1
+#define DEFAULT_GDS_ID 1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS 8
+
+#define OCL_DEVICE_RV710 0x0001
+#define OCL_DEVICE_RV730 0x0002
+#define OCL_DEVICE_RV770 0x0004
+#define OCL_DEVICE_CEDAR 0x0008
+#define OCL_DEVICE_REDWOOD 0x0010
+#define OCL_DEVICE_JUNIPER 0x0020
+#define OCL_DEVICE_CYPRESS 0x0040
+#define OCL_DEVICE_CAICOS 0x0080
+#define OCL_DEVICE_TURKS 0x0100
+#define OCL_DEVICE_BARTS 0x0200
+#define OCL_DEVICE_CAYMAN 0x0400
+#define OCL_DEVICE_ALL 0x3FFF
+
+/// The number of function ID's that are reserved for
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+namespace llvm {
+class AMDGPUInstrPrinter;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+/// Instruction selection passes.
+FunctionPass*
+ createAMDGPUISelDag(TargetMachine &TM);
+FunctionPass*
+ createAMDGPUPeepholeOpt(TargetMachine &TM);
+
+/// Pre emit passes.
+FunctionPass*
+ createAMDGPUCFGPreparationPass(TargetMachine &TM);
+FunctionPass*
+ createAMDGPUCFGStructurizerPass(TargetMachine &TM);
+
+extern Target TheAMDGPUTarget;
+} // end namespace llvm;
+
+/// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm {
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDGPUAS {
+enum AddressSpaces {
+ PRIVATE_ADDRESS = 0, // Address space for private memory.
+ GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
+ CONSTANT_ADDRESS = 2, // Address space for constant memory.
+ LOCAL_ADDRESS = 3, // Address space for local memory.
+ REGION_ADDRESS = 4, // Address space for region memory.
+ ADDRESS_NONE = 5, // Address space for unknown memory.
+ PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
+ PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
+ USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
+ LAST_ADDRESS = 9
+};
+
+} // namespace AMDGPUAS
+
+} // end namespace llvm
+#endif // AMDIL_H_
diff --git a/lib/Target/AMDGPU/AMDIL7XXDevice.cpp b/lib/Target/AMDGPU/AMDIL7XXDevice.cpp
new file mode 100644
index 0000000..8561f0b
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDIL7XXDevice.cpp
@@ -0,0 +1,129 @@
+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevice.h"
+
+using namespace llvm;
+
+AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST)
+{
+ setCaps();
+ std::string name = mSTM->getDeviceName();
+ if (name == "rv710") {
+ mDeviceFlag = OCL_DEVICE_RV710;
+ } else if (name == "rv730") {
+ mDeviceFlag = OCL_DEVICE_RV730;
+ } else {
+ mDeviceFlag = OCL_DEVICE_RV770;
+ }
+}
+
+AMDGPU7XXDevice::~AMDGPU7XXDevice()
+{
+}
+
+void AMDGPU7XXDevice::setCaps()
+{
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+}
+
+size_t AMDGPU7XXDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_700;
+ }
+ return 0;
+}
+
+size_t AMDGPU7XXDevice::getWavefrontSize() const
+{
+ return AMDGPUDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDGPU7XXDevice::getGeneration() const
+{
+ return AMDGPUDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const
+{
+ switch (DeviceID) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case GLOBAL_ID:
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ case ARENA_UAV_ID:
+ break;
+ case LDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ }
+ break;
+ case SCRATCH_ID:
+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ }
+ break;
+ case GDS_ID:
+ assert(0 && "GDS UAV ID is not supported on this chip");
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ }
+ break;
+ };
+
+ return 0;
+}
+
+uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const
+{
+ return 1;
+}
+
+AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST)
+{
+ setCaps();
+}
+
+AMDGPU770Device::~AMDGPU770Device()
+{
+}
+
+void AMDGPU770Device::setCaps()
+{
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ }
+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
+ mHWBits.reset(AMDGPUDeviceInfo::LongOps);
+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+}
+
+size_t AMDGPU770Device::getWavefrontSize() const
+{
+ return AMDGPUDevice::WavefrontSize;
+}
+
+AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST)
+{
+}
+
+AMDGPU710Device::~AMDGPU710Device()
+{
+}
+
+size_t AMDGPU710Device::getWavefrontSize() const
+{
+ return AMDGPUDevice::QuarterWavefrontSize;
+}
diff --git a/lib/Target/AMDGPU/AMDIL7XXDevice.h b/lib/Target/AMDGPU/AMDIL7XXDevice.h
new file mode 100644
index 0000000..e848e2e
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDIL7XXDevice.h
@@ -0,0 +1,70 @@
+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL7XXDEVICEIMPL_H_
+#define _AMDIL7XXDEVICEIMPL_H_
+#include "AMDILDevice.h"
+
+namespace llvm {
+class AMDGPUSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+// The AMDGPU7XXDevice class represents the generic 7XX device. All 7XX
+// devices are derived from this class. The AMDGPU7XX device will only
+// support the minimal features that are required to be considered OpenCL 1.0
+// compliant and nothing more.
+class AMDGPU7XXDevice : public AMDGPUDevice {
+public:
+ AMDGPU7XXDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU7XXDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
+ virtual uint32_t getMaxNumUAVs() const;
+
+protected:
+ virtual void setCaps();
+}; // AMDGPU7XXDevice
+
+// The AMDGPU770Device class represents the RV770 chip and it's
+// derivative cards. The difference between this device and the base
+// class is this device device adds support for double precision
+// and has a larger wavefront size.
+class AMDGPU770Device : public AMDGPU7XXDevice {
+public:
+ AMDGPU770Device(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU770Device();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDGPU770Device
+
+// The AMDGPU710Device class derives from the 7XX base class, but this
+// class is a smaller derivative, so we need to overload some of the
+// functions in order to correctly specify this information.
+class AMDGPU710Device : public AMDGPU7XXDevice {
+public:
+ AMDGPU710Device(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU710Device();
+ virtual size_t getWavefrontSize() const;
+}; // AMDGPU710Device
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
diff --git a/lib/Target/AMDGPU/AMDILBase.td b/lib/Target/AMDGPU/AMDILBase.td
new file mode 100644
index 0000000..7f72b49
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILBase.td
@@ -0,0 +1,80 @@
+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// AMDIL Subtarget features.
+//===----------------------------------------------------------------------===//
+def FeatureFP64 : SubtargetFeature<"fp64",
+ "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
+ "true",
+ "Enable 64bit double precision operations">;
+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
+ "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
+ "true",
+ "Enable byte addressable stores">;
+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
+ "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
+ "true",
+ "Enable duplicate barrier detection(HD5XXX or later).">;
+def FeatureImages : SubtargetFeature<"images",
+ "CapsOverride[AMDGPUDeviceInfo::Images]",
+ "true",
+ "Enable image functions">;
+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
+ "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
+ "true",
+ "Generate multiple UAV code(HD5XXX family or later)">;
+def FeatureMacroDB : SubtargetFeature<"macrodb",
+ "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
+ "true",
+ "Use internal macrodb, instead of macrodb in driver">;
+def FeatureNoAlias : SubtargetFeature<"noalias",
+ "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
+ "true",
+ "assert that all kernel argument pointers are not aliased">;
+def FeatureNoInline : SubtargetFeature<"no-inline",
+ "CapsOverride[AMDGPUDeviceInfo::NoInline]",
+ "true",
+ "specify whether to not inline functions">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+ "mIs64bit",
+ "false",
+ "Specify if 64bit addressing should be used.">;
+
+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+ "mIs32on64bit",
+ "false",
+ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+def FeatureDebug : SubtargetFeature<"debug",
+ "CapsOverride[AMDGPUDeviceInfo::Debug]",
+ "true",
+ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+ "mDumpCode",
+ "true",
+ "Dump MachineInstrs in the CodeEmitter">;
+
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+
+include "AMDILRegisterInfo.td"
+include "AMDILInstrInfo.td"
+
diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
new file mode 100644
index 0000000..b167d62
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -0,0 +1,3272 @@
+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUGME 0
+#define DEBUG_TYPE "structcfg"
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDIL.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define FirstNonDebugInstr(A) A->begin()
+using namespace llvm;
+
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
+ "matched");
+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
+ "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+ "pattern matched");
+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
+ "pattern matched");
+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
+ "matched");
+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct
+{
+#define SHOWNEWINSTR(i) \
+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+ if (b) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(errs()); \
+ errs() << "\n"; \
+ } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+ iterEnd = LoopInfo.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->print(OS, 0);
+ }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
+ size_t sz = Src.size();
+ for (size_t i = 0; i < sz/2; ++i) {
+ NodeT *t = Src[i];
+ Src[i] = Src[sz - i - 1];
+ Src[sz - i - 1] = t;
+ }
+}
+
+} //end namespace llvmCFGStruct
+
+
+//===----------------------------------------------------------------------===//
+//
+// MachinePostDominatorTree
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
+/// to compute the a post-dominator tree.
+///
+struct MachinePostDominatorTree : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DominatorTreeBase<MachineBasicBlock> *DT;
+ MachinePostDominatorTree() : MachineFunctionPass(ID)
+ {
+ DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+ // postdominator
+ }
+
+ ~MachinePostDominatorTree();
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ inline const std::vector<MachineBasicBlock *> &getRoots() const {
+ return DT->getRoots();
+ }
+
+ inline MachineDomTreeNode *getRootNode() const {
+ return DT->getRootNode();
+ }
+
+ inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline MachineBasicBlock *
+ findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
+ return DT->findNearestCommonDominator(A, B);
+ }
+
+ virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
+ DT->print(OS);
+ }
+};
+} //end of namespace llvm
+
+char MachinePostDominatorTree::ID = 0;
+static RegisterPass<MachinePostDominatorTree>
+machinePostDominatorTreePass("machinepostdomtree",
+ "MachinePostDominator Tree Construction",
+ true, true);
+
+//const PassInfo *const llvm::MachinePostDominatorsID
+//= &machinePostDominatorTreePass;
+
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+ //DEBUG(DT->dump());
+ return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+ delete DT;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation {
+public:
+ bool isRetired;
+ int sccNum;
+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+ //Instructions defining the corresponding successor.
+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation {
+public:
+ BlockT *landBlk;
+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
+ //WHILELOOP(thisloop) init before entering
+ //thisloop.
+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
+ //WHILELOOP(thisloop) init after entering
+ //thisloop.
+ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
+ //land block, branch cond on this reg.
+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
+ //endif" after ENDLOOP(thisloop) break
+ //outerLoopOf(thisLoop).
+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
+ //endif" after ENDLOOP(thisloop) continue on
+ //outerLoopOf(thisLoop).
+ LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class CFGStructurizer
+{
+public:
+ typedef enum {
+ Not_SinglePath = 0,
+ SinglePath_InPath = 1,
+ SinglePath_NotInPath = 2
+ } PathToKind;
+
+public:
+ typedef typename PassT::InstructionType InstrT;
+ typedef typename PassT::FunctionType FuncT;
+ typedef typename PassT::DominatortreeType DomTreeT;
+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
+ typedef typename PassT::LoopinfoType LoopInfoT;
+
+ typedef GraphTraits<FuncT *> FuncGTraits;
+ //typedef FuncGTraits::nodes_iterator BlockIterator;
+ typedef typename FuncT::iterator BlockIterator;
+
+ typedef typename FuncGTraits::NodeType BlockT;
+ typedef GraphTraits<BlockT *> BlockGTraits;
+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
+ //typedef BlockGTraits::succ_iterator InstructionIterator;
+ typedef typename BlockT::iterator InstrIterator;
+
+ typedef CFGStructTraits<PassT> CFGTraits;
+ typedef BlockInformation<InstrT> BlockInfo;
+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
+
+ typedef int RegiT;
+ typedef typename PassT::LoopType LoopT;
+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+ //landing info for loop break
+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
+
+public:
+ CFGStructurizer();
+ ~CFGStructurizer();
+
+ /// Perform the CFG structurization
+ bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+
+ /// Perform the CFG preparation
+ bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+
+private:
+ void reversePredicateSetter(typename BlockT::iterator);
+ void orderBlocks();
+ void printOrderedBlocks(llvm::raw_ostream &OS);
+ int patternMatch(BlockT *CurBlock);
+ int patternMatchGroup(BlockT *CurBlock);
+
+ int serialPatternMatch(BlockT *CurBlock);
+ int ifPatternMatch(BlockT *CurBlock);
+ int switchPatternMatch(BlockT *CurBlock);
+ int loopendPatternMatch(BlockT *CurBlock);
+ int loopPatternMatch(BlockT *CurBlock);
+
+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ //int loopWithoutBreak(BlockT *);
+
+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+ BlockT *ContBlock, LoopT *contLoop);
+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT **LandBlockPtr);
+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT *LandBlock,
+ bool Detail = false);
+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+ BlockT *TrueBlock, BlockT *FalseBlock,
+ BlockT *LandBlock);
+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+ BlockT *ExitLandBlock, RegiT SetReg);
+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+ RegiT SetReg);
+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+ std::set<BlockT*> &ExitBlockSet,
+ BlockT *ExitLandBlk);
+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+ BlockTSmallerVector &ExitingBlocks,
+ BlockTSmallerVector &ExitBlocks);
+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+ void removeUnconditionalBranch(BlockT *SrcBlock);
+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+ void removeSuccessor(BlockT *SrcBlock);
+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+ InstrIterator InsertPos);
+
+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
+ int getSCCNum(BlockT *srcBlk);
+
+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+ bool isRetiredBlock(BlockT *SrcBlock);
+ bool isActiveLoophead(BlockT *CurBlock);
+ bool needMigrateBlock(BlockT *Block);
+
+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+ BlockTSmallerVector &exitBlocks,
+ std::set<BlockT*> &ExitBlockSet);
+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+ BlockT *getLoopLandBlock(LoopT *LoopRep);
+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+ bool hasBackEdge(BlockT *curBlock);
+ unsigned getLoopDepth (LoopT *LoopRep);
+ int countActiveBlock(
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+ DomTreeT *domTree;
+ PostDomTreeT *postDomTree;
+ LoopInfoT *loopInfo;
+ PassT *passRep;
+ FuncT *funcRep;
+
+ BlockInfoMap blockInfoMap;
+ LoopLandInfoMap loopLandInfoMap;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+ const AMDGPURegisterInfo *TRI;
+
+}; //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+ E = blockInfoMap.end(); I != E; ++I) {
+ delete I->second;
+ }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
+ const AMDGPURegisterInfo * tri) {
+ passRep = &pass;
+ funcRep = &func;
+ TRI = tri;
+
+ bool changed = false;
+ //func.RenumberBlocks();
+
+ //to do, if not reducible flow graph, make it so ???
+
+ if (DEBUGME) {
+ errs() << "AMDGPUCFGStructurizer::prepare\n";
+ //func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //domTree = CFGTraits::getDominatorTree(pass);
+ //if (DEBUGME) {
+ // domTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //postDomTree = CFGTraits::getPostDominatorTree(pass);
+ //if (DEBUGME) {
+ // postDomTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+ iterEnd = loopInfo->end();
+ iter != iterEnd; ++iter) {
+ LoopT* loopRep = (*iter);
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (exitingBlks.size() == 0) {
+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+ if (dummyExitBlk != NULL)
+ retBlks.push_back(dummyExitBlk);
+ }
+ }
+
+ // Remove unconditional branch instr.
+ // Add dummy exit block iff there are multiple returns.
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+ iterBlk != iterEndBlk;
+ ++iterBlk) {
+ BlockT *curBlk = *iterBlk;
+ removeUnconditionalBranch(curBlk);
+ removeRedundantConditionalBranch(curBlk);
+ if (CFGTraits::isReturnBlock(curBlk)) {
+ retBlks.push_back(curBlk);
+ }
+ assert(curBlk->succ_size() <= 2);
+ //assert(curBlk->size() > 0);
+ //removeEmptyBlock(curBlk) ??
+ } //for
+
+ if (retBlks.size() >= 2) {
+ addDummyExitBlock(retBlks);
+ changed = true;
+ }
+
+ return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
+ const AMDGPURegisterInfo * tri) {
+ passRep = &pass;
+ funcRep = &func;
+ TRI = tri;
+
+ //func.RenumberBlocks();
+
+ //Assume reducible CFG...
+ if (DEBUGME) {
+ errs() << "AMDGPUCFGStructurizer::run\n";
+ //errs() << func.getFunction()->getNameStr() << "\n";
+ func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+#if 1
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ domTree = CFGTraits::getDominatorTree(pass);
+ if (DEBUGME) {
+ domTree->print(errs(), (const llvm::Module*)0);
+ }
+#endif
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ postDomTree = CFGTraits::getPostDominatorTree(pass);
+ if (DEBUGME) {
+ postDomTree->print(errs());
+ }
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+//#define STRESSTEST
+#ifdef STRESSTEST
+ //Use the worse block ordering to test the algorithm.
+ ReverseVector(orderedBlks);
+#endif
+
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+ int numIter = 0;
+ bool finish = false;
+ BlockT *curBlk;
+ bool makeProgress = false;
+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+ orderedBlks.end());
+
+ do {
+ ++numIter;
+ if (DEBUGME) {
+ errs() << "numIter = " << numIter
+ << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+ }
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin();
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlkEnd = orderedBlks.end();
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ sccBeginIter = iterBlk;
+ BlockT *sccBeginBlk = NULL;
+ int sccNumBlk = 0; // The number of active blocks, init to a
+ // maximum possible number.
+ int sccNumIter; // Number of iteration in this SCC.
+
+ while (iterBlk != iterBlkEnd) {
+ curBlk = *iterBlk;
+
+ if (sccBeginBlk == NULL) {
+ sccBeginIter = iterBlk;
+ sccBeginBlk = curBlk;
+ sccNumIter = 0;
+ sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+ if (DEBUGME) {
+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+ errs() << "\n";
+ }
+ }
+
+ if (!isRetiredBlock(curBlk)) {
+ patternMatch(curBlk);
+ }
+
+ ++iterBlk;
+
+ bool contNextScc = true;
+ if (iterBlk == iterBlkEnd
+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+ // Just finish one scc.
+ ++sccNumIter;
+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
+ if (DEBUGME) {
+ errs() << "Can't reduce SCC " << getSCCNum(curBlk)
+ << ", sccNumIter = " << sccNumIter;
+ errs() << "doesn't make any progress\n";
+ }
+ contNextScc = true;
+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
+ sccNumBlk = sccRemainedNumBlk;
+ iterBlk = sccBeginIter;
+ contNextScc = false;
+ if (DEBUGME) {
+ errs() << "repeat processing SCC" << getSCCNum(curBlk)
+ << "sccNumIter = " << sccNumIter << "\n";
+ func.viewCFG();
+ //func.viewCFGOnly();
+ }
+ } else {
+ // Finish the current scc.
+ contNextScc = true;
+ }
+ } else {
+ // Continue on next component in the current scc.
+ contNextScc = false;
+ }
+
+ if (contNextScc) {
+ sccBeginBlk = NULL;
+ }
+ } //while, "one iteration" over the function.
+
+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+ if (entryBlk->succ_size() == 0) {
+ finish = true;
+ if (DEBUGME) {
+ errs() << "Reduce to one block\n";
+ }
+ } else {
+ int newnumRemainedBlk
+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ // consider cloned blocks ??
+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+ makeProgress = true;
+ numRemainedBlk = newnumRemainedBlk;
+ } else {
+ makeProgress = false;
+ if (DEBUGME) {
+ errs() << "No progress\n";
+ }
+ }
+ }
+ } while (!finish && makeProgress);
+
+ // Misc wrap up to maintain the consistency of the Function representation.
+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+ // Detach retired Block, release memory.
+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ if ((*iterMap).second && (*iterMap).second->isRetired) {
+ assert(((*iterMap).first)->getNumber() != -1);
+ if (DEBUGME) {
+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+ }
+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
+ }
+ delete (*iterMap).second;
+ }
+ blockInfoMap.clear();
+
+ // clear loopLandInfoMap
+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ delete (*iterMap).second;
+ }
+ loopLandInfoMap.clear();
+
+ if (DEBUGME) {
+ func.viewCFG();
+ //func.dump();
+ }
+
+ if (!finish) {
+ assert(!"IRREDUCIBL_CF");
+ }
+
+ return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
+ size_t i = 0;
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+ iterBlk != iterBlkEnd;
+ ++iterBlk, ++i) {
+ os << "BB" << (*iterBlk)->getNumber();
+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+ if (i != 0 && i % 10 == 0) {
+ os << "\n";
+ } else {
+ os << " ";
+ }
+ }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
+ int sccNum = 0;
+ BlockT *bb;
+ for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+ std::vector<BlockT *> &sccNext = *sccIter;
+ for (typename std::vector<BlockT *>::const_iterator
+ blockIter = sccNext.begin(), blockEnd = sccNext.end();
+ blockIter != blockEnd; ++blockIter) {
+ bb = *blockIter;
+ orderedBlks.push_back(bb);
+ recordSccnum(bb, sccNum);
+ }
+ }
+
+ //walk through all the block in func to check for unreachable
+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+ blockEnd1 = FuncGTraits::nodes_end(funcRep);
+ blockIter1 != blockEnd1; ++blockIter1) {
+ BlockT *bb = &(*blockIter1);
+ sccNum = getSCCNum(bb);
+ if (sccNum == INVALIDSCCNUM) {
+ errs() << "unreachable block BB" << bb->getNumber() << "\n";
+ }
+ } //end of for
+} //orderBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
+ int numMatch = 0;
+ int curMatch;
+
+ if (DEBUGME) {
+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+ }
+
+ while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+ numMatch += curMatch;
+ }
+
+ if (DEBUGME) {
+ errs() << "End patternMatch BB" << curBlk->getNumber()
+ << ", numMatch = " << numMatch << "\n";
+ }
+
+ return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
+ int numMatch = 0;
+ numMatch += serialPatternMatch(curBlk);
+ numMatch += ifPatternMatch(curBlk);
+ //numMatch += switchPatternMatch(curBlk);
+ numMatch += loopendPatternMatch(curBlk);
+ numMatch += loopPatternMatch(curBlk);
+ return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 1) {
+ return 0;
+ }
+
+ BlockT *childBlk = *curBlk->succ_begin();
+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+ return 0;
+ }
+
+ mergeSerialBlock(curBlk, childBlk);
+ ++numSerialPatternMatch;
+ return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
+ //two edges
+ if (curBlk->succ_size() != 2) {
+ return 0;
+ }
+
+ if (hasBackEdge(curBlk)) {
+ return 0;
+ }
+
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+ if (branchInstr == NULL) {
+ return 0;
+ }
+
+ assert(CFGTraits::isCondBranch(branchInstr));
+
+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+ BlockT *landBlk;
+ int cloned = 0;
+
+ // TODO: Simplify
+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+ landBlk = *trueBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+ landBlk = NULL;
+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+ landBlk = falseBlk;
+ falseBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && *falseBlk->succ_begin() == trueBlk) {
+ landBlk = trueBlk;
+ trueBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+ landBlk = *falseBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+ landBlk = *trueBlk->succ_begin();
+ } else {
+ return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+ }
+
+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+ // new BB created for landBlk==NULL may introduce new challenge to the
+ // reduction process.
+ if (landBlk != NULL &&
+ ((trueBlk && trueBlk->pred_size() > 1)
+ || (falseBlk && falseBlk->pred_size() > 1))) {
+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+ }
+
+ if (trueBlk && trueBlk->pred_size() > 1) {
+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+ ++cloned;
+ }
+
+ if (falseBlk && falseBlk->pred_size() > 1) {
+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+ ++cloned;
+ }
+
+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+ ++numIfPatternMatch;
+
+ numClonedBlock += cloned;
+
+ return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
+ return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ typename std::vector<LoopT *> nestedLoops;
+ while (loopRep) {
+ nestedLoops.push_back(loopRep);
+ loopRep = loopRep->getParentLoop();
+ }
+
+ if (nestedLoops.size() == 0) {
+ return 0;
+ }
+
+ // Process nested loop outside->inside, so "continue" to a outside loop won't
+ // be mistaken as "break" of the current loop.
+ int num = 0;
+ for (typename std::vector<LoopT *>::reverse_iterator
+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+ iter != iterEnd; ++iter) {
+ loopRep = *iter;
+
+ if (getLoopLandBlock(loopRep) != NULL) {
+ continue;
+ }
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+ if (numBreak == -1) {
+ break;
+ }
+
+ int numCont = loopcontPatternMatch(loopRep, loopHeader);
+ num += numBreak + numCont;
+ }
+
+ return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 0) {
+ return 0;
+ }
+
+ int numLoop = 0;
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+ if (loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ mergeLooplandBlock(curBlk, loopLand);
+ ++numLoop;
+ }
+ }
+ loopRep = loopRep->getParentLoop();
+ }
+
+ numLoopPatternMatch += numLoop;
+
+ return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+ }
+
+ if (exitingBlks.size() == 0) {
+ setLoopLandBlock(loopRep);
+ return 0;
+ }
+
+ // Compute the corresponding exitBlks and exit block set.
+ BlockTSmallerVector exitBlks;
+ std::set<BlockT *> exitBlkSet;
+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *exitingBlk = *iter;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ exitBlks.push_back(exitBlk);
+ exitBlkSet.insert(exitBlk); //non-duplicate insert
+ }
+
+ assert(exitBlkSet.size() > 0);
+ assert(exitBlks.size() == exitingBlks.size());
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+ }
+
+ // Find exitLandBlk.
+ BlockT *exitLandBlk = NULL;
+ int numCloned = 0;
+ int numSerial = 0;
+
+ if (exitBlkSet.size() == 1)
+ {
+ exitLandBlk = *exitBlkSet.begin();
+ } else {
+ exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+ if (exitLandBlk == NULL) {
+ return -1;
+ }
+
+ bool allInPath = true;
+ bool allNotInPath = true;
+ for (typename std::set<BlockT*>::const_iterator
+ iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+
+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+ if (DEBUGME) {
+ errs() << "BB" << exitBlk->getNumber()
+ << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+ << pathKind << "\n";
+ }
+
+ allInPath = allInPath && (pathKind == SinglePath_InPath);
+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+ if (!allInPath && !allNotInPath) {
+ if (DEBUGME) {
+ errs() << "singlePath check fail\n";
+ }
+ return -1;
+ }
+ } // check all exit blocks
+
+ if (allNotInPath) {
+#if 1
+
+ // TODO: Simplify, maybe separate function?
+ //funcRep->viewCFG();
+ LoopT *parentLoopRep = loopRep->getParentLoop();
+ BlockT *parentLoopHeader = NULL;
+ if (parentLoopRep)
+ parentLoopHeader = parentLoopRep->getHeader();
+
+ if (exitLandBlk == parentLoopHeader &&
+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+ loopRep,
+ exitBlkSet,
+ exitLandBlk)) != NULL) {
+ if (DEBUGME) {
+ errs() << "relocateLoopcontBlock success\n";
+ }
+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+ exitingBlks,
+ exitBlks)) != NULL) {
+ if (DEBUGME) {
+ errs() << "insertEndbranchBlock success\n";
+ }
+ } else {
+ if (DEBUGME) {
+ errs() << "loop exit fail\n";
+ }
+ return -1;
+ }
+#else
+ return -1;
+#endif
+ }
+
+ // Handle side entry to exit path.
+ exitBlks.clear();
+ exitBlkSet.clear();
+ for (typename BlockTSmallerVector::iterator iterExiting =
+ exitingBlks.begin(),
+ iterExitingEnd = exitingBlks.end();
+ iterExiting != iterExitingEnd; ++iterExiting) {
+ BlockT *exitingBlk = *iterExiting;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ BlockT *newExitBlk = exitBlk;
+
+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+ ++numCloned;
+ }
+
+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+ exitBlks.push_back(newExitBlk);
+ exitBlkSet.insert(newExitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ numSerial += serialPatternMatch(exitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ if (exitBlk->pred_size() > 1) {
+ if (exitBlk != exitLandBlk) {
+ return -1;
+ }
+ } else {
+ if (exitBlk != exitLandBlk &&
+ (exitBlk->succ_size() != 1 ||
+ *exitBlk->succ_begin() != exitLandBlk)) {
+ return -1;
+ }
+ }
+ }
+ } // else
+
+ // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+ // Fold break into the breaking block. Leverage across level breaks.
+ assert(exitingBlks.size() == exitBlks.size());
+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+ BlockT *exitBlk = *iterExit;
+ BlockT *exitingBlk = *iterExiting;
+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+ }
+
+ int numBreak = static_cast<int>(exitingBlks.size());
+ numLoopbreakPatternMatch += numBreak;
+ numClonedBlock += numCloned;
+ return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ int numCont = 0;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+ for (typename InvBlockGTraits::ChildIteratorType iter =
+ InvBlockGTraits::child_begin(loopHeader),
+ iterEnd = InvBlockGTraits::child_end(loopHeader);
+ iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ if (loopRep->contains(curBlk)) {
+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+ loopHeader, loopRep);
+ contBlk.push_back(curBlk);
+ ++numCont;
+ }
+ }
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+ iter = contBlk.begin(), iterEnd = contBlk.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->removeSuccessor(loopHeader);
+ }
+
+ numLoopcontPatternMatch += numCont;
+
+ return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+ BlockT *src2Blk) {
+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+ // same loop with LoopLandInfo without explicitly keeping track of
+ // loopContBlks and loopBreakBlks, this is a method to get the information.
+ //
+ if (src1Blk->succ_size() == 0) {
+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+ if (theEntry != NULL) {
+ if (DEBUGME) {
+ errs() << "isLoopContBreakBlock yes src1 = BB"
+ << src1Blk->getNumber()
+ << " src2 = BB" << src2Blk->getNumber() << "\n";
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+} //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+ if (num == 0) {
+ if (DEBUGME) {
+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+ }
+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+ }
+ return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = 0;
+ BlockT *downBlk;
+
+ //trueBlk could be the common post dominator
+ downBlk = trueBlk;
+
+ if (DEBUGME) {
+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+ << " true = BB" << trueBlk->getNumber()
+ << ", numSucc=" << trueBlk->succ_size()
+ << " false = BB" << falseBlk->getNumber() << "\n";
+ }
+
+ while (downBlk) {
+ if (DEBUGME) {
+ errs() << "check down = BB" << downBlk->getNumber();
+ }
+
+ if (//postDomTree->dominates(downBlk, falseBlk) &&
+ singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+ if (DEBUGME) {
+ errs() << " working\n";
+ }
+
+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+ numClonedBlock += num;
+ num += serialPatternMatch(*headBlk->succ_begin());
+ num += serialPatternMatch(*(++headBlk->succ_begin()));
+ num += ifPatternMatch(headBlk);
+ assert(num > 0); //
+
+ break;
+ }
+ if (DEBUGME) {
+ errs() << " not working\n";
+ }
+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+ } // walk down the postDomTree
+
+ return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk,
+ bool detail) {
+ errs() << "head = BB" << headBlk->getNumber()
+ << " size = " << headBlk->size();
+ if (detail) {
+ errs() << "\n";
+ headBlk->print(errs());
+ errs() << "\n";
+ }
+
+ if (trueBlk) {
+ errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+ << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ trueBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (falseBlk) {
+ errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+ << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ falseBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (landBlk) {
+ errs() << ", land = BB" << landBlk->getNumber() << " size = "
+ << landBlk->size() << " numPred = " << landBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ landBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+
+ errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT **plandBlk) {
+ bool migrateTrue = false;
+ bool migrateFalse = false;
+
+ BlockT *landBlk = *plandBlk;
+
+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+ && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+ if (trueBlk == falseBlk) {
+ return 0;
+ }
+
+#if 0
+ if (DEBUGME) {
+ errs() << "improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+#endif
+
+ // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
+ // May consider the # landBlk->pred_size() as it represents the number of
+ // assignment initReg = .. needed to insert.
+ migrateTrue = needMigrateBlock(trueBlk);
+ migrateFalse = needMigrateBlock(falseBlk);
+
+ if (!migrateTrue && !migrateFalse) {
+ return 0;
+ }
+
+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+ // have more than one predecessors. without doing this, its predecessor
+ // rather than headBlk will have undefined value in initReg.
+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+ migrateTrue = true;
+ }
+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+ migrateFalse = true;
+ }
+
+ if (DEBUGME) {
+ errs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+ //
+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+ // {initReg = 0; org falseBlk branch }
+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+ // => org landBlk
+ // if landBlk->pred_size() > 2, put the about if-else inside
+ // if (initReg !=2) {...}
+ //
+ // add initReg = initVal to headBlk
+
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ unsigned initReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ if (!migrateTrue || !migrateFalse) {
+ int initVal = migrateTrue ? 0 : 1;
+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+ }
+
+ int numNewBlk = 0;
+
+ if (landBlk == NULL) {
+ landBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(landBlk); //insert to function
+
+ if (trueBlk) {
+ trueBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ if (falseBlk) {
+ falseBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ numNewBlk ++;
+ }
+
+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+ //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos
+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
+
+ if (landBlkHasOtherPred) {
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+ unsigned cmpResReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+
+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+ initReg, immReg);
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+ AMDGPU::IF_LOGICALZ_i32, passRep,
+ cmpResReg, DebugLoc());
+ }
+
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
+ passRep, initReg, DebugLoc());
+
+ if (migrateTrue) {
+ migrateInstruction(trueBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 1).
+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+ }
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
+
+ if (migrateFalse) {
+ migrateInstruction(falseBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 0)
+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+ }
+ //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
+
+ if (landBlkHasOtherPred) {
+ // add endif
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
+
+ // put initReg = 2 to other predecessors of landBlk
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+ ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (curBlk != trueBlk && curBlk != falseBlk) {
+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+ }
+ } //for
+ }
+ if (DEBUGME) {
+ errs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // update landBlk
+ *plandBlk = landBlk;
+
+ return numNewBlk;
+} //improveSimpleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+ LoopT *exitingLoop,
+ BlockT *exitBlk,
+ LoopT *exitLoop,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+ }
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+ RegiT initReg = INVALIDREGNUM;
+ if (exitingLoop != exitLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(initReg != INVALIDREGNUM);
+ addLoopBreakInitReg(exitLoop, initReg);
+ while (exitingLoop != exitLoop && exitingLoop) {
+ addLoopBreakOnReg(exitingLoop, initReg);
+ exitingLoop = exitingLoop->getParentLoop();
+ }
+ assert(exitingLoop == exitLoop);
+ }
+
+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+ LoopT *contingLoop,
+ BlockT *contBlk,
+ LoopT *contLoop) {
+ if (DEBUGME) {
+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+ << " header = BB" << contBlk->getNumber() << "\n";
+
+ errs() << "Trying to continue loop-depth = "
+ << getLoopDepth(contLoop)
+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ if (contingLoop != contLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(initReg != INVALIDREGNUM);
+ addLoopContInitReg(contLoop, initReg);
+ while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
+ contingLoop = contingLoop->getParentLoop();
+ }
+ assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+ addLoopContOnReg(contingLoop, initReg);
+ }
+
+ settleLoopcontBlock(contingBlk, contBlk, initReg);
+ //contingBlk->removeSuccessor(loopHeader);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "serialPattern BB" << dstBlk->getNumber()
+ << " <= BB" << srcBlk->getNumber() << "\n";
+ }
+ //removeUnconditionalBranch(dstBlk);
+ dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
+
+ dstBlk->removeSuccessor(srcBlk);
+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+ removeSuccessor(srcBlk);
+ retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+ BlockT *curBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "ifPattern BB" << curBlk->getNumber();
+ errs() << "{ ";
+ if (trueBlk) {
+ errs() << "BB" << trueBlk->getNumber();
+ }
+ errs() << " } else ";
+ errs() << "{ ";
+ if (falseBlk) {
+ errs() << "BB" << falseBlk->getNumber();
+ }
+ errs() << " }\n ";
+ errs() << "landBlock: ";
+ if (landBlk == NULL) {
+ errs() << "NULL";
+ } else {
+ errs() << "BB" << landBlk->getNumber();
+ }
+ errs() << "\n";
+ }
+
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc branchDL = branchInstr->getDebugLoc();
+
+// transform to
+// if cond
+// trueBlk
+// else
+// falseBlk
+// endif
+// landBlk
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(curBlk, branchInstr);
+ CFGTraits::insertCondBranchBefore(branchInstrPos,
+ CFGTraits::getBranchNzeroOpcode(oldOpcode),
+ passRep,
+ branchDL);
+
+ if (trueBlk) {
+ curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
+ curBlk->removeSuccessor(trueBlk);
+ if (landBlk && trueBlk->succ_size()!=0) {
+ trueBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, trueBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
+
+ if (falseBlk) {
+ curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
+ falseBlk->end());
+ curBlk->removeSuccessor(falseBlk);
+ if (landBlk && falseBlk->succ_size() != 0) {
+ falseBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, falseBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
+
+ //curBlk->remove(branchInstrPos);
+ branchInstr->eraseFromParent();
+
+ if (landBlk && trueBlk && falseBlk) {
+ curBlk->addSuccessor(landBlk);
+ }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+ LoopLandInfo *loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+
+ if (DEBUGME) {
+ errs() << "loopPattern header = BB" << dstBlk->getNumber()
+ << " land = BB" << landBlk->getNumber() << "\n";
+ }
+
+ // Loop contInitRegs are init at the beginning of the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->contInitRegs.begin(),
+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the
+ * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
+ * search for the DebugLoc in the that statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
+ // Loop breakInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakInitRegs.begin(),
+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
+ {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+ // Loop endbranchInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->endbranchInitRegs.begin(),
+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+ * search for the DebugLoc in the continue statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+ // loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakOnRegs.begin(),
+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
+ *iter);
+ }
+
+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+ // loop.
+ for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
+ passRep, *iter);
+ }
+
+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
+ }
+
+ removeSuccessor(landBlk);
+ retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
+{
+ while (I--) {
+ if (I->getOpcode() == AMDGPU::PRED_X) {
+ switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
+ return;
+ case OPCODE_IS_NOT_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
+ return;
+ case OPCODE_IS_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
+ return;
+ case OPCODE_IS_NOT_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
+ return;
+ default:
+ assert(0 && "PRED_X Opcode invalid!");
+ }
+ }
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+ BlockT *exitBlk,
+ BlockT *exitLandBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+ << " exit = BB" << exitBlk->getNumber()
+ << " land = BB" << exitLandBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+
+ // transform exitingBlk to
+ // if ( ) {
+ // exitBlk (if exitBlk != exitLandBlk)
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+ //break_logical
+
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstrPos);
+ }
+ int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ } else {
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstr);
+ }
+ int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+ exitBlk->end());
+ }
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
+ } //if_logical
+
+ //now branchInst can be erase safely
+ //exitingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+
+ //now take care of successors, retire blocks
+ exitingBlk->removeSuccessor(exitBlk);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitBlk->removeSuccessor(exitLandBlk);
+ retireBlock(exitingBlk, exitBlk);
+ }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+ BlockT *contBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "settleLoopcontBlock conting = BB"
+ << contingBlk->getNumber()
+ << ", cont = BB" << contBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+ if (branchInstr) {
+ assert(CFGTraits::isCondBranch(branchInstr));
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(contingBlk, branchInstr);
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ // transform contingBlk to
+ // if () {
+ // move instr after branchInstr
+ // continue
+ // or
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(contingBlk) - loopHeader}
+
+ bool useContinueLogical =
+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+ if (useContinueLogical == false)
+ {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
+ }
+
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
+ } else {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+ : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+ }
+
+ //contingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+ } else {
+ /* if we've arrived here then we've already erased the branch instruction
+ * travel back up the basic block to see the last reference of our debug location
+ * we've just inserted that reference here so it should be representative */
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ }
+ } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+// (1) set newBlk common successor of BBs in exitBlkSet
+// (2) change the continue-instr in BBs in exitBlkSet to break-instr
+// (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+ LoopT *loopRep,
+ std::set<BlockT *> &exitBlkSet,
+ BlockT *exitLandBlk) {
+ std::set<BlockT *> endBlkSet;
+
+// BlockT *parentLoopHead = parentLoopRep->getHeader();
+
+
+ for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+ return NULL;
+
+ endBlkSet.insert(endBlk);
+ }
+
+ BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newBlk); //insert to function
+ CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
+ SHOWNEWBLK(newBlk, "New continue block: ");
+
+ for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+ iterEnd = endBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *endBlk = *iter;
+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+ if (contInstr) {
+ contInstr->eraseFromParent();
+ }
+ endBlk->addSuccessor(newBlk);
+ if (DEBUGME) {
+ errs() << "Add new continue Block to BB"
+ << endBlk->getNumber() << " successors\n";
+ }
+ }
+
+ return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+ BlockTSmallerVector &exitingBlks,
+ BlockTSmallerVector &exitBlks) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+ RegiT endBranchReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(endBranchReg >= 0);
+
+ // reg = 0 before entering the loop
+ addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+ uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+ assert(numBlks >=2 && numBlks == exitBlks.size());
+
+ BlockT *preExitingBlk = exitingBlks[0];
+ BlockT *preExitBlk = exitBlks[0];
+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(preBranchBlk); //insert to function
+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+ BlockT *newLandBlk = preBranchBlk;
+
+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+ newLandBlk);
+ preExitingBlk->removeSuccessor(preExitBlk);
+ preExitingBlk->addSuccessor(newLandBlk);
+
+ //it is redundant to add reg = 0 to exitingBlks[0]
+
+ // For 1..n th exiting path (the last iteration handles two pathes) create the
+ // branch to the previous path and the current path.
+ for (uint32_t i = 1; i < numBlks; ++i) {
+ BlockT *curExitingBlk = exitingBlks[i];
+ BlockT *curExitBlk = exitBlks[i];
+ BlockT *curBranchBlk;
+
+ if (i == numBlks - 1) {
+ curBranchBlk = curExitBlk;
+ } else {
+ curBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(curBranchBlk); //insert to function
+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+ }
+
+ // Add reg = i to exitingBlks[i].
+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
+ endBranchReg, i);
+
+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+ // (exitingBlks[i], newLandBlk).
+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+ newLandBlk);
+ curExitingBlk->removeSuccessor(curExitBlk);
+ curExitingBlk->addSuccessor(newLandBlk);
+
+ // add to preBranchBlk the branch instruction:
+ // if (endBranchReg == preVal)
+ // preExitBlk
+ // else
+ // curBranchBlk
+ //
+ // preValReg = i - 1
+
+ DebugLoc DL;
+ RegiT preValReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+
+ preBranchBlk->insert(preBranchBlk->begin(),
+ tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
+ i - 1));
+
+ // condResReg = (endBranchReg == preValReg)
+ RegiT condResReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
+ .addReg(endBranchReg).addReg(preValReg);
+
+ BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
+ .addMBB(preExitBlk).addReg(condResReg);
+
+ preBranchBlk->addSuccessor(preExitBlk);
+ preBranchBlk->addSuccessor(curBranchBlk);
+
+ // Update preExitingBlk, preExitBlk, preBranchBlk.
+ preExitingBlk = curExitingBlk;
+ preExitBlk = curExitBlk;
+ preBranchBlk = curBranchBlk;
+
+ } //end for 1 .. n blocks
+
+ return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return Not_SinglePath;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return SinglePath_NotInPath;
+ }
+
+ return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return srcBlk;
+ }
+
+ if (srcBlk->succ_size() == 0) {
+ return srcBlk;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ BlockT *preBlk = srcBlk;
+
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == NULL) {
+ return preBlk;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return NULL;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return srcBlk;
+ }
+
+ return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+ BlockT *dstBlk) {
+ int cloned = 0;
+ assert(preBlk->isSuccessor(srcBlk));
+ while (srcBlk && srcBlk != dstBlk) {
+ assert(srcBlk->succ_size() == 1);
+ if (srcBlk->pred_size() > 1) {
+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+ ++cloned;
+ }
+
+ preBlk = srcBlk;
+ srcBlk = *srcBlk->succ_begin();
+ }
+
+ return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+ BlockT *predBlk) {
+ assert(predBlk->isSuccessor(curBlk) &&
+ "succBlk is not a prececessor of curBlk");
+
+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+ //srcBlk, oldBlk, newBlk
+
+ predBlk->removeSuccessor(curBlk);
+ predBlk->addSuccessor(cloneBlk);
+
+ // add all successor to cloneBlk
+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+ numClonedInstr += curBlk->size();
+
+ if (DEBUGME) {
+ errs() << "Cloned block: " << "BB"
+ << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+ }
+
+ SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+ return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+ BlockT *exitingBlk) {
+ BlockT *exitBlk = NULL;
+
+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+ iterSuccEnd = exitingBlk->succ_end();
+ iterSucc != iterSuccEnd; ++iterSucc) {
+ BlockT *curBlk = *iterSucc;
+ if (!loopRep->contains(curBlk)) {
+ assert(exitBlk == NULL);
+ exitBlk = curBlk;
+ }
+ }
+
+ assert(exitBlk != NULL);
+
+ return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+ BlockT *dstBlk,
+ InstrIterator insertPos) {
+ InstrIterator spliceEnd;
+ //look for the input branchinstr, not the AMDGPU branchinstr
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ if (branchInstr == NULL) {
+ if (DEBUGME) {
+ errs() << "migrateInstruction don't see branch instr\n" ;
+ }
+ spliceEnd = srcBlk->end();
+ } else {
+ if (DEBUGME) {
+ errs() << "migrateInstruction see branch instr\n" ;
+ branchInstr->dump();
+ }
+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+ }
+ if (DEBUGME) {
+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+
+ //splice insert before insertPos
+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+ if (DEBUGME) {
+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+// B1:
+// uncond_br LoopHeader
+//
+// to
+// B1:
+// cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
+ BlockT *loopHeader;
+ BlockT *loopLatch;
+ loopHeader = LoopRep->getHeader();
+ loopLatch = LoopRep->getLoopLatch();
+ BlockT *dummyExitBlk = NULL;
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ if (loopHeader!=NULL && loopLatch!=NULL) {
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+ dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos(loopLatch, branchInstr);
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+ InstrT *newInstr =
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
+ MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
+
+ SHOWNEWINSTR(newInstr);
+
+ branchInstr->eraseFromParent();
+ loopLatch->addSuccessor(dummyExitBlk);
+ }
+ }
+
+ return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
+ InstrT *branchInstr;
+
+ // I saw two unconditional branch in one basic block in example
+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+ && CFGTraits::isUncondBranch(branchInstr)) {
+ if (DEBUGME) {
+ errs() << "Removing unconditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
+ if (srcBlk->succ_size() == 2) {
+ BlockT *blk1 = *srcBlk->succ_begin();
+ BlockT *blk2 = *(++srcBlk->succ_begin());
+
+ if (blk1 == blk2) {
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+ if (DEBUGME) {
+ errs() << "Removing unneeded conditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ SHOWNEWBLK(blk1, "Removing redundant successor");
+ srcBlk->removeSuccessor(blk1);
+ }
+ }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+ DEFAULT_VEC_SLOTS> &retBlks) {
+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+ retBlks.begin(),
+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+ if (curInstr) {
+ curInstr->eraseFromParent();
+ }
+#if 0
+ if (curBlk->size()==0 && curBlk->pred_size() == 1) {
+ if (DEBUGME) {
+ errs() << "Replace empty block BB" << curBlk->getNumber()
+ << " with dummyExitBlock\n";
+ }
+ BlockT *predb = *curBlk->pred_begin();
+ predb->removeSuccessor(curBlk);
+ curBlk = predb;
+ } //handle empty curBlk
+#endif
+ curBlk->addSuccessor(dummyExitBlk);
+ if (DEBUGME) {
+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+ << " successors\n";
+ }
+ } //for
+
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
+ while (srcBlk->succ_size()) {
+ srcBlk->removeSuccessor(*srcBlk->succ_begin());
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+ }
+
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->isRetired = true;
+ //int i = srcBlk->succ_size();
+ //int j = srcBlk->pred_size();
+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+ && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+ if(loopLand == NULL)
+ return true;
+
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ return true;
+ }
+
+ loopRep = loopRep->getParentLoop();
+ }
+
+ return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
+ const unsigned blockSizeThreshold = 30;
+ const unsigned cloneInstrThreshold = 100;
+
+ bool multiplePreds = blk && (blk->pred_size() > 1);
+
+ if(!multiplePreds)
+ return false;
+
+ unsigned blkSize = blk->size();
+ return ((blkSize > blockSizeThreshold)
+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+ BlockTSmallerVector &exitBlks,
+ std::set<BlockT *> &exitBlkSet) {
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
+
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end();
+ predIter != predIterEnd; ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+ inpathBlks.push_back(curBlk);
+ }
+ } //for
+
+ //if landBlk has predecessors that are not in the given loop,
+ //create a new block
+ BlockT *newLandBlk = landBlk;
+ if (inpathBlks.size() != landBlk->pred_size()) {
+ newLandBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newLandBlk); //insert to function
+ newLandBlk->addSuccessor(landBlk);
+ for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+ inpathBlks.begin(),
+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+ //srcBlk, oldBlk, newBlk
+ curBlk->removeSuccessor(landBlk);
+ curBlk->addSuccessor(newLandBlk);
+ }
+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+ if (exitBlks[i] == landBlk) {
+ exitBlks[i] = newLandBlk;
+ }
+ }
+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+ }
+
+ setLoopLandBlock(loopRep, newLandBlk);
+
+ return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ assert(theEntry->landBlk == NULL);
+
+ if (blk == NULL) {
+ blk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(blk); //insert to function
+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+ }
+
+ theEntry->landBlk = blk;
+
+ if (DEBUGME) {
+ errs() << "setLoopLandBlock loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " landing-block = BB" << blk->getNumber() << "\n";
+ }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+
+ theEntry->breakOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->breakInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+ RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->endbranchInitRegs.insert(regNum);
+
+ if (DEBUGME)
+ {
+ errs() << "addLoopEndbranchInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ if (loopRep == NULL)
+ return false;
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
+ return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
+ int count = 0;
+ while (iterStart != iterEnd) {
+ if (!isRetiredBlock(*iterStart)) {
+ ++count;
+ }
+ ++iterStart;
+ }
+
+ return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
+
+ if (postDomTree->dominates(blk1, blk2)) {
+ return blk1;
+ }
+ if (postDomTree->dominates(blk2, blk1)) {
+ return blk2;
+ }
+
+ DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+ DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+ // Handle newly cloned node.
+ if (node1 == NULL && blk1->succ_size() == 1) {
+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+ }
+ if (node2 == NULL && blk2->succ_size() == 1) {
+ return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+ }
+
+ if (node1 == NULL || node2 == NULL) {
+ return NULL;
+ }
+
+ node1 = node1->getIDom();
+ while (node1) {
+ if (postDomTree->dominates(node1, node2)) {
+ return node1->getBlock();
+ }
+ node1 = node1->getIDom();
+ }
+
+ return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks) {
+ BlockT *commonDom;
+ typename std::set<BlockT *>::const_iterator iter = blks.begin();
+ typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+ BlockT *curBlk = *iter;
+ if (curBlk != commonDom) {
+ commonDom = findNearestCommonPostDom(curBlk, commonDom);
+ }
+ }
+
+ if (DEBUGME) {
+ errs() << "Common post dominator for exit blocks is ";
+ if (commonDom) {
+ errs() << "BB" << commonDom->getNumber() << "\n";
+ } else {
+ errs() << "NULL\n";
+ }
+ }
+
+ return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDGPU
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDGPUCFGStructurizer : public MachineFunctionPass
+{
+public:
+ typedef MachineInstr InstructionType;
+ typedef MachineFunction FunctionType;
+ typedef MachineBasicBlock BlockType;
+ typedef MachineLoopInfo LoopinfoType;
+ typedef MachineDominatorTree DominatortreeType;
+ typedef MachinePostDominatorTree PostDominatortreeType;
+ typedef MachineDomTreeNode DomTreeNodeType;
+ typedef MachineLoop LoopType;
+
+protected:
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const AMDGPURegisterInfo *TRI;
+
+public:
+ AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
+ const TargetInstrInfo *getTargetInstrInfo() const;
+ //bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDGPUCFGStructurizer
+
+//char AMDGPUCFGStructurizer::ID = 0;
+} //end of namespace llvm
+AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm
+ )
+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
+ TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())
+ ) {
+}
+
+const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
+ return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDGPUCFGPrepare(TargetMachine &tm);
+
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDGPUCFGPrepare
+
+char AMDGPUCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
+ : AMDGPUCFGStructurizer(ID, tm )
+{
+}
+const char *AMDGPUCFGPrepare::getPassName() const {
+ return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDGPUCFGPerform : public AMDGPUCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDGPUCFGPerform(TargetMachine &tm);
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDGPUCFGPerform
+
+char AMDGPUCFGPerform::ID = 0;
+} //end of namespace llvm
+
+ AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
+: AMDGPUCFGStructurizer(ID, tm)
+{
+}
+
+const char *AMDGPUCFGPerform::getPassName() const {
+ return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDGPUCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// this class is tailor to the AMDGPU backend
+template<>
+struct CFGStructTraits<AMDGPUCFGStructurizer>
+{
+ typedef int RegiT;
+
+ static int getBreakNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBreakZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
+ ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
+ case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
+ ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
+ case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueNzeroOpcode(int oldOpcode)
+ {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+// the explicitly represented branch target is the true branch target
+#define getExplicitBranch getTrueBranch
+#define setExplicitBranch setTrueBranch
+
+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+ return instr->getOperand(0).getMBB();
+ }
+
+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+ instr->getOperand(0).setMBB(blk);
+ }
+
+ static MachineBasicBlock *
+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(blk->succ_size() == 2);
+ MachineBasicBlock *trueBranch = getTrueBranch(instr);
+ MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+ MachineBasicBlock::succ_iterator iterNext = iter;
+ ++iterNext;
+
+ return (*iter == trueBranch) ? *iterNext : *iter;
+ }
+
+ static bool isCondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDGPU::JUMP:
+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
+ ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
+ case AMDGPU::SI_IF_NZ:
+ case AMDGPU::SI_IF_Z:
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isUncondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDGPU::JUMP:
+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
+ DebugLoc DL;
+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getDebugLoc().isUnknown() == false) {
+ DL = instr->getDebugLoc();
+ }
+ }
+ return DL;
+ }
+
+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ MachineInstr *instr = &*iter;
+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+ return instr;
+ }
+ return NULL;
+ }
+
+ // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+ //
+ // BB with backward-edge could have move instructions after the branch
+ // instruction. Such move instruction "belong to" the loop backward-edge.
+ //
+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+ const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
+ blk->getParent()->getTarget().getInstrInfo());
+
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+ iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+ // FIXME: Simplify
+ MachineInstr *instr = &*iter;
+ if (instr) {
+ if (isCondBranch(instr) || isUncondBranch(instr)) {
+ return instr;
+ } else if (!TII->isMov(instr->getOpcode())) {
+ break;
+ }
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::RETURN) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::CONTINUE) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static bool isReturnBlock(MachineBasicBlock *blk) {
+ MachineInstr *instr = getReturnInstr(blk);
+ bool isReturn = (blk->succ_size() == 0);
+ if (instr) {
+ assert(isReturn);
+ } else if (isReturn) {
+ if (DEBUGME) {
+ errs() << "BB" << blk->getNumber()
+ <<" is return block without RETURN instr\n";
+ }
+ }
+
+ return isReturn;
+ }
+
+ static MachineBasicBlock::iterator
+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(instr->getParent() == blk && "instruction doesn't belong to block");
+ MachineBasicBlock::iterator iter = blk->begin();
+ MachineBasicBlock::iterator iterEnd = blk->end();
+ while (&(*iter) != instr && iter != iterEnd) {
+ ++iter;
+ }
+
+ assert(iter != iterEnd);
+ return iter;
+ }//getInstrPos
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrBefore
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ MachineBasicBlock::iterator res;
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrEnd
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr = blk->getParent()
+ ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->push_back(newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ } //insertInstrEnd
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DebugLoc());
+
+ blk->insert(instrPos, newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ DebugLoc DL) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DL);
+
+ blk->insert(instrPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
+ false);
+
+ SHOWNEWINSTR(newInstr);
+ //erase later oldInstr->eraseFromParent();
+ } //insertCondBranchBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator insertPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum,
+ DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ //insert before
+ blk->insert(insertPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchBefore
+
+ static void insertCondBranchEnd(MachineBasicBlock *blk,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+ blk->push_back(newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchEnd
+
+
+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+ regVal);
+ blk->insert(instrPos, newInstr);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertAssignInstrBefore
+
+ static void insertAssignInstrBefore(MachineBasicBlock *blk,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+
+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+ regVal);
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ } //insertInstrBefore
+
+ static void insertCompareInstrBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator instrPos,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT dstReg, RegiT src1Reg,
+ RegiT src2Reg) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
+
+ MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
+ MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
+
+ blk->insert(instrPos, newInstr);
+ SHOWNEWINSTR(newInstr);
+
+ } //insertCompareInstrBefore
+
+ static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+ MachineBasicBlock *srcBlk) {
+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
+ }
+ } //cloneSuccessorList
+
+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+ MachineFunction *func = srcBlk->getParent();
+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+ func->push_back(newBlk); //insert to function
+ //newBlk->setNumber(srcBlk->getNumber());
+ for (MachineBasicBlock::iterator iter = srcBlk->begin(),
+ iterEnd = srcBlk->end();
+ iter != iterEnd; ++iter) {
+ MachineInstr *instr = func->CloneMachineInstr(iter);
+ newBlk->push_back(instr);
+ }
+ return newBlk;
+ }
+
+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+ //the AMDGPU instruction is not recognized as terminator fix this and retire
+ //this routine
+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+ MachineBasicBlock *oldBlk,
+ MachineBasicBlock *newBlk) {
+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+ if (branchInstr && isCondBranch(branchInstr) &&
+ getExplicitBranch(branchInstr) == oldBlk) {
+ setExplicitBranch(branchInstr, newBlk);
+ }
+ }
+
+ static void wrapup(MachineBasicBlock *entryBlk) {
+ assert((!entryBlk->getParent()->getJumpTableInfo()
+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+ && "found a jump table");
+
+ //collect continue right before endloop
+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+ MachineBasicBlock::iterator pre = entryBlk->begin();
+ MachineBasicBlock::iterator iterEnd = entryBlk->end();
+ MachineBasicBlock::iterator iter = pre;
+ while (iter != iterEnd) {
+ if (pre->getOpcode() == AMDGPU::CONTINUE
+ && iter->getOpcode() == AMDGPU::ENDLOOP) {
+ contInstr.push_back(pre);
+ }
+ pre = iter;
+ ++iter;
+ } //end while
+
+ //delete continue right before endloop
+ for (unsigned i = 0; i < contInstr.size(); ++i) {
+ contInstr[i]->eraseFromParent();
+ }
+
+ // TODO to fix up jump table so later phase won't be confused. if
+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+ // there isn't such an interface yet. alternatively, replace all the other
+ // blocks in the jump table with the entryBlk //}
+
+ } //wrapup
+
+ static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineDominatorTree>();
+ }
+
+ static MachinePostDominatorTree*
+ getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachinePostDominatorTree>();
+ }
+
+ static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineLoopInfo>();
+ }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+// createAMDGPUCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
+ ) {
+ return new AMDGPUCFGPrepare(tm );
+}
+
+bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
+ *this,
+ TRI);
+}
+
+// createAMDGPUCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
+ ) {
+ return new AMDGPUCFGPerform(tm );
+}
+
+bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
+ *this,
+ TRI);
+}
+
+//end of file newline goes below
+
diff --git a/lib/Target/AMDGPU/AMDILDevice.cpp b/lib/Target/AMDGPU/AMDILDevice.cpp
new file mode 100644
index 0000000..3955828
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILDevice.cpp
@@ -0,0 +1,137 @@
+//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevice.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST)
+{
+ mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
+ mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
+ setCaps();
+ mDeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDGPUDevice::~AMDGPUDevice()
+{
+ mHWBits.clear();
+ mSWBits.clear();
+}
+
+size_t AMDGPUDevice::getMaxGDSSize() const
+{
+ return 0;
+}
+
+uint32_t
+AMDGPUDevice::getDeviceFlag() const
+{
+ return mDeviceFlag;
+}
+
+size_t AMDGPUDevice::getMaxNumCBs() const
+{
+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
+ return HW_MAX_NUM_CB;
+ }
+
+ return 0;
+}
+
+size_t AMDGPUDevice::getMaxCBSize() const
+{
+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
+ return MAX_CB_SIZE;
+ }
+
+ return 0;
+}
+
+size_t AMDGPUDevice::getMaxScratchSize() const
+{
+ return 65536;
+}
+
+uint32_t AMDGPUDevice::getStackAlignment() const
+{
+ return 16;
+}
+
+void AMDGPUDevice::setCaps()
+{
+ mSWBits.set(AMDGPUDeviceInfo::HalfOps);
+ mSWBits.set(AMDGPUDeviceInfo::ByteOps);
+ mSWBits.set(AMDGPUDeviceInfo::ShortOps);
+ mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
+ mSWBits.set(AMDGPUDeviceInfo::NoInline);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
+ mSWBits.set(AMDGPUDeviceInfo::MacroDB);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
+ }
+ mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
+}
+
+AMDGPUDeviceInfo::ExecutionMode
+AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const
+{
+ if (mHWBits[Caps]) {
+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDGPUDeviceInfo::Hardware;
+ }
+
+ if (mSWBits[Caps]) {
+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDGPUDeviceInfo::Software;
+ }
+
+ return AMDGPUDeviceInfo::Unsupported;
+
+}
+
+bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
+}
+
+bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
+}
+
+bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
+}
+
+std::string
+AMDGPUDevice::getDataLayout() const
+{
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
diff --git a/lib/Target/AMDGPU/AMDILDevice.h b/lib/Target/AMDGPU/AMDILDevice.h
new file mode 100644
index 0000000..05fe153
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILDevice.h
@@ -0,0 +1,116 @@
+//===---- AMDILDevice.h - Define Device Data for AMDGPU -----*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILDEVICEIMPL_H_
+#define _AMDILDEVICEIMPL_H_
+#include "AMDIL.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+ class AMDGPUSubtarget;
+ class MCStreamer;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDGPUDevice {
+public:
+ AMDGPUDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUDevice();
+
+ // Enum values for the various memory types.
+ enum {
+ RAW_UAV_ID = 0,
+ ARENA_UAV_ID = 1,
+ LDS_ID = 2,
+ GDS_ID = 3,
+ SCRATCH_ID = 4,
+ CONSTANT_ID = 5,
+ GLOBAL_ID = 6,
+ MAX_IDS = 7
+ } IO_TYPE_IDS;
+
+ // Returns the max LDS size that the hardware supports. Size is in
+ // bytes.
+ virtual size_t getMaxLDSSize() const = 0;
+
+ // Returns the max GDS size that the hardware supports if the GDS is
+ // supported by the hardware. Size is in bytes.
+ virtual size_t getMaxGDSSize() const;
+
+ // Returns the max number of hardware constant address spaces that
+ // are supported by this device.
+ virtual size_t getMaxNumCBs() const;
+
+ // Returns the max number of bytes a single hardware constant buffer
+ // can support. Size is in bytes.
+ virtual size_t getMaxCBSize() const;
+
+ // Returns the max number of bytes allowed by the hardware scratch
+ // buffer. Size is in bytes.
+ virtual size_t getMaxScratchSize() const;
+
+ // Get the flag that corresponds to the device.
+ virtual uint32_t getDeviceFlag() const;
+
+ // Returns the number of work-items that exist in a single hardware
+ // wavefront.
+ virtual size_t getWavefrontSize() const = 0;
+
+ // Get the generational name of this specific device.
+ virtual uint32_t getGeneration() const = 0;
+
+ // Get the stack alignment of this specific device.
+ virtual uint32_t getStackAlignment() const;
+
+ // Get the resource ID for this specific device.
+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+ // Get the max number of UAV's for this device.
+ virtual uint32_t getMaxNumUAVs() const = 0;
+
+
+ // API utilizing more detailed capabilities of each family of
+ // cards. If a capability is supported, then either usesHardware or
+ // usesSoftware returned true. If usesHardware returned true, then
+ // usesSoftware must return false for the same capability. Hardware
+ // execution means that the feature is done natively by the hardware
+ // and is not emulated by the softare. Software execution means
+ // that the feature could be done in the hardware, but there is
+ // software that emulates it with possibly using the hardware for
+ // support since the hardware does not fully comply with OpenCL
+ // specs.
+ bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
+ bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
+ bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
+ virtual std::string getDataLayout() const;
+ static const unsigned int MAX_LDS_SIZE_700 = 16384;
+ static const unsigned int MAX_LDS_SIZE_800 = 32768;
+ static const unsigned int WavefrontSize = 64;
+ static const unsigned int HalfWavefrontSize = 32;
+ static const unsigned int QuarterWavefrontSize = 16;
+protected:
+ virtual void setCaps();
+ llvm::BitVector mHWBits;
+ llvm::BitVector mSWBits;
+ AMDGPUSubtarget *mSTM;
+ uint32_t mDeviceFlag;
+private:
+ AMDGPUDeviceInfo::ExecutionMode
+ getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
+}; // AMDGPUDevice
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
diff --git a/lib/Target/AMDGPU/AMDILDeviceInfo.cpp b/lib/Target/AMDGPU/AMDILDeviceInfo.cpp
new file mode 100644
index 0000000..b2f7cfb
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILDeviceInfo.cpp
@@ -0,0 +1,94 @@
+//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Function that creates DeviceInfo from a device name and other information.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+namespace llvm {
+namespace AMDGPUDeviceInfo {
+ AMDGPUDevice*
+getDeviceFromName(const std::string &deviceName, AMDGPUSubtarget *ptr,
+ bool is64bit, bool is64on32bit)
+{
+ if (deviceName.c_str()[2] == '7') {
+ switch (deviceName.c_str()[3]) {
+ case '1':
+ return new AMDGPU710Device(ptr);
+ case '7':
+ return new AMDGPU770Device(ptr);
+ default:
+ return new AMDGPU7XXDevice(ptr);
+ };
+ } else if (deviceName == "cypress") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCypressDevice(ptr);
+ } else if (deviceName == "juniper") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUEvergreenDevice(ptr);
+ } else if (deviceName == "redwood") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPURedwoodDevice(ptr);
+ } else if (deviceName == "cedar") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCedarDevice(ptr);
+ } else if (deviceName == "barts"
+ || deviceName == "turks") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUNIDevice(ptr);
+ } else if (deviceName == "cayman") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCaymanDevice(ptr);
+ } else if (deviceName == "caicos") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUNIDevice(ptr);
+ } else if (deviceName == "SI") {
+ return new AMDGPUSIDevice(ptr);
+ } else {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPU7XXDevice(ptr);
+ }
+}
+} // End namespace AMDGPUDeviceInfo
+} // End namespace llvm
diff --git a/lib/Target/AMDGPU/AMDILDeviceInfo.h b/lib/Target/AMDGPU/AMDILDeviceInfo.h
new file mode 100644
index 0000000..4fa021e
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILDeviceInfo.h
@@ -0,0 +1,90 @@
+//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDILDEVICEINFO_H_
+#define _AMDILDEVICEINFO_H_
+
+
+#include <string>
+
+namespace llvm
+{
+ class AMDGPUDevice;
+ class AMDGPUSubtarget;
+ namespace AMDGPUDeviceInfo
+ {
+ // Each Capabilities can be executed using a hardware instruction,
+ // emulated with a sequence of software instructions, or not
+ // supported at all.
+ enum ExecutionMode {
+ Unsupported = 0, // Unsupported feature on the card(Default value)
+ Software, // This is the execution mode that is set if the
+ // feature is emulated in software
+ Hardware // This execution mode is set if the feature exists
+ // natively in hardware
+ };
+
+ // Any changes to this needs to have a corresponding update to the
+ // twiki page GPUMetadataABI
+ enum Caps {
+ HalfOps = 0x1, // Half float is supported or not.
+ DoubleOps = 0x2, // Double is supported or not.
+ ByteOps = 0x3, // Byte(char) is support or not.
+ ShortOps = 0x4, // Short is supported or not.
+ LongOps = 0x5, // Long is supported or not.
+ Images = 0x6, // Images are supported or not.
+ ByteStores = 0x7, // ByteStores available(!HD4XXX).
+ ConstantMem = 0x8, // Constant/CB memory.
+ LocalMem = 0x9, // Local/LDS memory.
+ PrivateMem = 0xA, // Scratch/Private/Stack memory.
+ RegionMem = 0xB, // OCL GDS Memory Extension.
+ FMA = 0xC, // Use HW FMA or SW FMA.
+ ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
+ MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
+ Reserved0 = 0xF, // ReservedFlag
+ NoAlias = 0x10, // Cached loads.
+ Signed24BitOps = 0x11, // Peephole Optimization.
+ // Debug mode implies that no hardware features or optimizations
+ // are performned and that all memory access go through a single
+ // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+ Debug = 0x12, // Debug mode is enabled.
+ CachedMem = 0x13, // Cached mem is available or not.
+ BarrierDetect = 0x14, // Detect duplicate barriers.
+ Reserved1 = 0x15, // Reserved flag
+ ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
+ ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
+ TmrReg = 0x18, // Flag to specify if Tmr register is supported.
+ NoInline = 0x19, // Flag to specify that no inlining should occur.
+ MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
+ HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
+ ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
+ PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
+ // If more capabilities are required, then
+ // this number needs to be increased.
+ // All capabilities must come before this
+ // number.
+ MaxNumberCapabilities = 0x20
+ };
+ // These have to be in order with the older generations
+ // having the lower number enumerations.
+ enum Generation {
+ HD4XXX = 0, // 7XX based devices.
+ HD5XXX, // Evergreen based devices.
+ HD6XXX, // NI/Evergreen+ based devices.
+ HD7XXX,
+ HDTEST, // Experimental feature testing device.
+ HDNUMGEN
+ };
+
+
+ AMDGPUDevice*
+ getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
+ bool is64bit = false, bool is64on32bit = false);
+ } // namespace AMDILDeviceInfo
+} // namespace llvm
+#endif // _AMDILDEVICEINFO_H_
diff --git a/lib/Target/AMDGPU/AMDILDevices.h b/lib/Target/AMDGPU/AMDILDevices.h
new file mode 100644
index 0000000..cfcc330
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILDevices.h
@@ -0,0 +1,19 @@
+//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef __AMDIL_DEVICES_H_
+#define __AMDIL_DEVICES_H_
+// Include all of the device specific header files
+// This file is for Internal use only!
+#include "AMDIL7XXDevice.h"
+#include "AMDILDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSIDevice.h"
+
+#endif // _AMDIL_DEVICES_H_
diff --git a/lib/Target/AMDGPU/AMDILEvergreenDevice.cpp b/lib/Target/AMDGPU/AMDILEvergreenDevice.cpp
new file mode 100644
index 0000000..3532a28
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILEvergreenDevice.cpp
@@ -0,0 +1,169 @@
+//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
+: AMDGPUDevice(ST) {
+ setCaps();
+ std::string name = ST->getDeviceName();
+ if (name == "cedar") {
+ mDeviceFlag = OCL_DEVICE_CEDAR;
+ } else if (name == "redwood") {
+ mDeviceFlag = OCL_DEVICE_REDWOOD;
+ } else if (name == "cypress") {
+ mDeviceFlag = OCL_DEVICE_CYPRESS;
+ } else {
+ mDeviceFlag = OCL_DEVICE_JUNIPER;
+ }
+}
+
+AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
+}
+
+size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
+ return 12;
+}
+
+uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
+ switch(id) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ return GLOBAL_RETURN_RAW_UAV_ID;
+ case GLOBAL_ID:
+ case ARENA_UAV_ID:
+ return DEFAULT_ARENA_UAV_ID;
+ case LDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case GDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case SCRATCH_ID:
+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ };
+ return 0;
+}
+
+size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
+ return AMDGPUDevice::WavefrontSize;
+}
+
+uint32_t AMDGPUEvergreenDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD5XXX;
+}
+
+void AMDGPUEvergreenDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
+ mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
+ mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
+ mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
+ mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
+ mHWBits.set(AMDGPUDeviceInfo::ByteStores);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+ mSWBits.set(AMDGPUDeviceInfo::RegionMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::LocalMem);
+ mHWBits.set(AMDGPUDeviceInfo::RegionMem);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::Images);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
+ mHWBits.set(AMDGPUDeviceInfo::NoAlias);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::CachedMem);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
+ mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
+ mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
+ mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
+ mHWBits.set(AMDGPUDeviceInfo::LongOps);
+ mSWBits.reset(AMDGPUDeviceInfo::LongOps);
+ mHWBits.set(AMDGPUDeviceInfo::TmrReg);
+}
+
+AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCypressDevice::~AMDGPUCypressDevice() {
+}
+
+void AMDGPUCypressDevice::setCaps() {
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ mHWBits.set(AMDGPUDeviceInfo::FMA);
+ }
+}
+
+
+AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCedarDevice::~AMDGPUCedarDevice() {
+}
+
+void AMDGPUCedarDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+}
+
+size_t AMDGPUCedarDevice::getWavefrontSize() const {
+ return AMDGPUDevice::QuarterWavefrontSize;
+}
+
+AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPURedwoodDevice::~AMDGPURedwoodDevice()
+{
+}
+
+void AMDGPURedwoodDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+}
+
+size_t AMDGPURedwoodDevice::getWavefrontSize() const {
+ return AMDGPUDevice::HalfWavefrontSize;
+}
diff --git a/lib/Target/AMDGPU/AMDILEvergreenDevice.h b/lib/Target/AMDGPU/AMDILEvergreenDevice.h
new file mode 100644
index 0000000..5def695
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILEvergreenDevice.h
@@ -0,0 +1,87 @@
+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILEVERGREENDEVICE_H_
+#define _AMDILEVERGREENDEVICE_H_
+#include "AMDILDevice.h"
+#include "AMDGPUSubtarget.h"
+
+namespace llvm {
+ class AMDGPUSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+// The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
+// series of cards. This class contains information required to differentiate
+// the Evergreen device from the generic AMDGPUDevice. This device represents
+// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDGPUEvergreenDevice : public AMDGPUDevice {
+public:
+ AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUEvergreenDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getMaxGDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getMaxNumUAVs() const;
+ virtual uint32_t getResourceID(uint32_t) const;
+protected:
+ virtual void setCaps();
+}; // AMDGPUEvergreenDevice
+
+// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
+// support for double precision operations. This device is used to represent
+// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+// and HD59XX cards.
+class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUCypressDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUCypressDevice();
+private:
+ virtual void setCaps();
+}; // AMDGPUCypressDevice
+
+
+// The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
+// devices. This class differs from the base AMDGPUEvergreenDevice in that the
+// device is a ~quarter of the 'Juniper'. These are commercially known as the
+// HD54XX and HD53XX series of cards.
+class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUCedarDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUCedarDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDGPUCedarDevice
+
+// The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
+// devices. This class differs from the base class, in that these devices are
+// considered about half of a 'Juniper' device. These are commercially known as
+// the HD55XX and HD56XX series of cards.
+class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPURedwoodDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDGPURedwoodDevice
+
+} // namespace llvm
+#endif // _AMDILEVERGREENDEVICE_H_
diff --git a/lib/Target/AMDGPU/AMDILFrameLowering.cpp b/lib/Target/AMDGPU/AMDILFrameLowering.cpp
new file mode 100644
index 0000000..f2a0fe5
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILFrameLowering.cpp
@@ -0,0 +1,53 @@
+//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILFrameLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+using namespace llvm;
+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
+ int LAO, unsigned TransAl)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl)
+{
+}
+
+AMDGPUFrameLowering::~AMDGPUFrameLowering()
+{
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index.
+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI);
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
+{
+ NumEntries = 0;
+ return 0;
+}
+void
+AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const
+{
+}
+void
+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+}
+bool
+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const
+{
+ return false;
+}
diff --git a/lib/Target/AMDGPU/AMDILFrameLowering.h b/lib/Target/AMDGPU/AMDILFrameLowering.h
new file mode 100644
index 0000000..934ee46
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILFrameLowering.h
@@ -0,0 +1,46 @@
+//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILFRAME_LOWERING_H_
+#define _AMDILFRAME_LOWERING_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+/// Information about the stack frame layout on the AMDGPU targets. It holds
+/// the direction of the stack growth, the known stack alignment on entry to
+/// each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+
+namespace llvm {
+ class AMDGPUFrameLowering : public TargetFrameLowering {
+ public:
+ AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
+ TransAl = 1);
+ virtual ~AMDGPUFrameLowering();
+ virtual int getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const;
+ virtual const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+ }; // class AMDGPUFrameLowering
+} // namespace llvm
+#endif // _AMDILFRAME_LOWERING_H_
diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
new file mode 100644
index 0000000..63bc21e
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
@@ -0,0 +1,395 @@
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AMDIL target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
+#include "AMDGPURegisterInfo.h"
+#include "AMDILDevices.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include <list>
+#include <queue>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDGPUDAGToDAGISel - AMDGPU specific code to select AMDGPU machine instructions
+// //for SelectionDAG operations.
+//
+namespace {
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const AMDGPUSubtarget &Subtarget;
+public:
+ AMDGPUDAGToDAGISel(TargetMachine &TM);
+ virtual ~AMDGPUDAGToDAGISel();
+
+ SDNode *Select(SDNode *N);
+ virtual const char *getPassName() const;
+
+private:
+ inline SDValue getSmallIPtrImm(unsigned Imm);
+
+ // Complex pattern selectors
+ bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
+ bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+
+ static bool checkType(const Value *ptr, unsigned int addrspace);
+ static const Value *getBasePointerValue(const Value *V);
+
+ static bool isGlobalStore(const StoreSDNode *N);
+ static bool isPrivateStore(const StoreSDNode *N);
+ static bool isLocalStore(const StoreSDNode *N);
+ static bool isRegionStore(const StoreSDNode *N);
+
+ static bool isCPLoad(const LoadSDNode *N);
+ static bool isConstantLoad(const LoadSDNode *N, int cbID);
+ static bool isGlobalLoad(const LoadSDNode *N);
+ static bool isPrivateLoad(const LoadSDNode *N);
+ static bool isLocalLoad(const LoadSDNode *N);
+ static bool isRegionLoad(const LoadSDNode *N);
+
+ bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
+ bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+// createAMDGPUISelDag - This pass converts a legalized DAG into a AMDGPU-specific
+// DAG, ready for instruction scheduling.
+//
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
+ ) {
+ return new AMDGPUDAGToDAGISel(TM);
+}
+
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
+ )
+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>())
+{
+}
+
+AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
+}
+
+SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRParam(
+ SDValue Addr, SDValue& R1, SDValue& R2) {
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+ return SelectADDRParam(Addr, R1, R2);
+}
+
+
+bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ return true;
+}
+
+SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ switch (Opc) {
+ default: break;
+ case ISD::FrameIndex:
+ {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
+ unsigned int FI = FIN->getIndex();
+ EVT OpVT = N->getValueType(0);
+ unsigned int NewOpc = AMDGPU::COPY;
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
+ }
+ }
+ break;
+ }
+ return SelectCode(N);
+}
+
+bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
+ if (!ptr) {
+ return false;
+ }
+ Type *ptrType = ptr->getType();
+ return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+}
+
+const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V)
+{
+ if (!V) {
+ return NULL;
+ }
+ const Value *ret = NULL;
+ ValueMap<const Value *, bool> ValueBitMap;
+ std::queue<const Value *, std::list<const Value *> > ValueQueue;
+ ValueQueue.push(V);
+ while (!ValueQueue.empty()) {
+ V = ValueQueue.front();
+ if (ValueBitMap.find(V) == ValueBitMap.end()) {
+ ValueBitMap[V] = true;
+ if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
+ ret = V;
+ break;
+ } else if (dyn_cast<GlobalVariable>(V)) {
+ ret = V;
+ break;
+ } else if (dyn_cast<Constant>(V)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
+ if (CE) {
+ ValueQueue.push(CE->getOperand(0));
+ }
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ ret = AI;
+ break;
+ } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ uint32_t numOps = I->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ ValueQueue.push(I->getOperand(x));
+ }
+ } else {
+ // assert(0 && "Found a Value that we didn't know how to handle!");
+ }
+ }
+ ValueQueue.pop();
+ }
+ return ret;
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+ return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
+}
+
+bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
+ if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
+ return true;
+ }
+ MachineMemOperand *MMO = N->getMemOperand();
+ const Value *V = MMO->getValue();
+ const Value *BV = getBasePointerValue(V);
+ if (MMO
+ && MMO->getValue()
+ && ((V && dyn_cast<GlobalValue>(V))
+ || (BV && dyn_cast<GlobalValue>(
+ getBasePointerValue(MMO->getValue()))))) {
+ return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
+ } else {
+ return false;
+ }
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
+ MachineMemOperand *MMO = N->getMemOperand();
+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+ if (MMO) {
+ const Value *V = MMO->getValue();
+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+ // Check to make sure we are not a constant pool load or a constant load
+ // that is marked as a private load
+ if (isCPLoad(N) || isConstantLoad(N, -1)) {
+ return false;
+ }
+ }
+ if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS))
+ {
+ return true;
+ }
+ return false;
+}
+
+const char *AMDGPUDAGToDAGISel::getPassName() const {
+ return "AMDGPU DAG->DAG Pattern Instruction Selection";
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
+
+///==== AMDGPU Functions ====///
+
+bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
+ SDValue& Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ bool Match = false;
+
+ // Find the base ptr and the offset
+ for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
+ SDValue Arg = Addr.getOperand(i);
+ ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
+ // This arg isn't a constant so it must be the base PTR.
+ if (!OffsetNode) {
+ Base = Addr.getOperand(i);
+ continue;
+ }
+ // Check if the constant argument fits in 8-bits. The offset is in bytes
+ // so we need to convert it to dwords.
+ if (isInt<8>(OffsetNode->getZExtValue() >> 2)) {
+ Match = true;
+ Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
+ MVT::i32);
+ }
+ }
+ return Match;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset)
+{
+ ConstantSDNode * IMMOffset;
+
+ if (Addr.getOpcode() == ISD::ADD
+ && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ return true;
+ // If the pointer address is constant, we can move it to the offset field.
+ } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ CurDAG->getEntryNode().getDebugLoc(),
+ AMDGPU::ZERO, MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ return true;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
+ SDValue& Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() != ISD::ADD) {
+ return false;
+ }
+
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+
+ return true;
+}
diff --git a/lib/Target/AMDGPU/AMDILISelLowering.cpp b/lib/Target/AMDGPU/AMDILISelLowering.cpp
new file mode 100644
index 0000000..680f0fc
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILISelLowering.cpp
@@ -0,0 +1,746 @@
+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains TargetLowering functions borrowed from AMDLI.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDILDevices.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+#include "AMDGPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+void AMDGPUTargetLowering::InitAMDILLowering()
+{
+ int types[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::f32,
+ (int)MVT::f64,
+ (int)MVT::i64,
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+
+ int IntTypes[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::i64
+ };
+
+ int FloatTypes[] =
+ {
+ (int)MVT::f32,
+ (int)MVT::f64
+ };
+
+ int VectorTypes[] =
+ {
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+ size_t numTypes = sizeof(types) / sizeof(*types);
+ size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+ size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+ size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+ const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
+ // These are the current register classes that are
+ // supported
+
+ for (unsigned int x = 0; x < numTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+ // We cannot sextinreg, expand to shifts
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::BRCOND, VT, Custom);
+ setOperationAction(ISD::BR_JT, VT, Expand);
+ setOperationAction(ISD::BRIND, VT, Expand);
+ // TODO: Implement custom UREM/SREM routines
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ if (VT != MVT::i64 && VT != MVT::v2i64) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ }
+ }
+ for (unsigned int x = 0; x < numFloatTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+ // IL does not have these operations for floating point types
+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+ setOperationAction(ISD::SETOLT, VT, Expand);
+ setOperationAction(ISD::SETOGE, VT, Expand);
+ setOperationAction(ISD::SETOGT, VT, Expand);
+ setOperationAction(ISD::SETOLE, VT, Expand);
+ setOperationAction(ISD::SETULT, VT, Expand);
+ setOperationAction(ISD::SETUGE, VT, Expand);
+ setOperationAction(ISD::SETUGT, VT, Expand);
+ setOperationAction(ISD::SETULE, VT, Expand);
+ }
+
+ for (unsigned int x = 0; x < numIntTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+ // GPU also does not have divrem function for signed or unsigned
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ // GPU doesn't have a rotl, rotr, or byteswap instruction
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ // GPU doesn't have any counting operators
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
+ {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ // setOperationAction(ISD::VSETCC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+
+ }
+ if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ }
+ if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
+ // we support loading/storing v2f64 but not operations on the type
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ // We want to expand vector conversions into their scalar
+ // counterparts.
+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ }
+ // TODO: Fix the UDIV24 algorithm so it works for these
+ // types correctly. This needs vector comparisons
+ // for this to work correctly.
+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
+
+ setSchedulingPreference(Sched::RegPressure);
+ setPow2DivIsCheap(false);
+ setPrefLoopAlignment(16);
+ setSelectIsExpensive(true);
+ setJumpIsExpensive(true);
+
+ maxStoresPerMemcpy = 4096;
+ maxStoresPerMemmove = 4096;
+ maxStoresPerMemset = 4096;
+
+#undef numTypes
+#undef numIntTypes
+#undef numVectorTypes
+#undef numFloatTypes
+}
+
+bool
+AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const
+{
+ return false;
+}
+
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool
+AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const
+{
+ APInt KnownZero2;
+ APInt KnownOne2;
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::SELECT_CC:
+ DAG.ComputeMaskedBits(
+ Op.getOperand(1),
+ KnownZero,
+ KnownOne,
+ Depth + 1
+ );
+ DAG.ComputeMaskedBits(
+ Op.getOperand(0),
+ KnownZero2,
+ KnownOne2
+ );
+ assert((KnownZero & KnownOne) == 0
+ && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0
+ && "Bits known to be one AND zero?");
+ // Only known if known in both the LHS and RHS
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
+{
+ EVT VT = Op.getValueType();
+ SDValue Nodes1;
+ SDValue second;
+ SDValue third;
+ SDValue fourth;
+ DebugLoc DL = Op.getDebugLoc();
+ Nodes1 = DAG.getNode(AMDGPUISD::VBUILD,
+ DL,
+ VT, Op.getOperand(0));
+#if 0
+ bool allEqual = true;
+ for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
+ if (Op.getOperand(0) != Op.getOperand(x)) {
+ allEqual = false;
+ break;
+ }
+ }
+ if (allEqual) {
+ return Nodes1;
+ }
+#endif
+ switch(Op.getNumOperands()) {
+ default:
+ case 1:
+ break;
+ case 4:
+ fourth = Op.getOperand(3);
+ if (fourth.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ fourth,
+ DAG.getConstant(7, MVT::i32));
+ }
+ case 3:
+ third = Op.getOperand(2);
+ if (third.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ third,
+ DAG.getConstant(6, MVT::i32));
+ }
+ case 2:
+ second = Op.getOperand(1);
+ if (second.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ second,
+ DAG.getConstant(5, MVT::i32));
+ }
+ break;
+ };
+ return Nodes1;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Data = Op.getOperand(0);
+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+ DebugLoc DL = Op.getDebugLoc();
+ EVT DVT = Data.getValueType();
+ EVT BVT = BaseType->getVT();
+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+ unsigned shiftBits = srcBits - baseBits;
+ if (srcBits < 32) {
+ // If the op is less than 32 bits, then it needs to extend to 32bits
+ // so it can properly keep the upper bits valid.
+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+ shiftBits = 32 - baseBits;
+ DVT = IVT;
+ }
+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
+ // Shift left by 'Shift' bits.
+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+ // Signed shift Right by 'Shift' bits.
+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+ if (srcBits < 32) {
+ // Once the sign extension is done, the op needs to be converted to
+ // its original type.
+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+ }
+ return Data;
+}
+EVT
+AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
+{
+ int iSize = (size * numEle);
+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
+ if (!vEle) {
+ vEle = 1;
+ }
+ if (size == 64) {
+ if (vEle == 1) {
+ return EVT(MVT::i64);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
+ }
+ } else {
+ if (vEle == 1) {
+ return EVT(MVT::i32);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
+ }
+ }
+}
+
+SDValue
+AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+ SDValue Result;
+ Result = DAG.getNode(
+ AMDGPUISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
+ return Result;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSelectCC(DL,
+ r0, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSelectCC(DL,
+ r1, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
diff --git a/lib/Target/AMDGPU/AMDILInstrInfo.td b/lib/Target/AMDGPU/AMDILInstrInfo.td
new file mode 100644
index 0000000..779566d
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILInstrInfo.td
@@ -0,0 +1,276 @@
+//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file describes the AMDIL instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+// AMDIL Instruction Predicate Definitions
+// Predicate that is set to true if the hardware supports double precision
+// divide
+def HasHWDDiv : Predicate<"Subtarget.device()"
+ "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware supports double, but not double
+// precision divide in hardware
+def HasSWDDiv : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware support 24bit signed
+// math ops. Otherwise a software expansion to 32bit math ops is used instead.
+def HasHWSign24Bit : Predicate<"Subtarget.device()"
+ "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
+
+// Predicate that is set to true if 64bit operations are supported or not
+def HasHW64Bit : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::LongOps)">;
+def HasSW64Bit : Predicate<"Subtarget.device()"
+ "->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
+
+// Predicate that is set to true if the timer register is supported
+def HasTmrRegister : Predicate<"Subtarget.device()"
+ "->isSupported(AMDGPUDeviceInfo::TmrReg)">;
+// Predicate that is true if we are at least evergreen series
+def HasDeviceIDInst : Predicate<"Subtarget.device()"
+ "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
+
+// Predicate that is true if we have region address space.
+def hasRegionAS : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
+
+// Predicate that is false if we don't have region address space.
+def noRegionAS : Predicate<"!Subtarget.device()"
+ "->isSupported(AMDGPUDeviceInfo::RegionMem)">;
+
+
+// Predicate that is set to true if 64bit Mul is supported in the IL or not
+def HasHW64Mul : Predicate<"Subtarget.calVersion()"
+ ">= CAL_VERSION_SC_139"
+ "&& Subtarget.device()"
+ "->getGeneration() >="
+ "AMDGPUDeviceInfo::HD5XXX">;
+def HasSW64Mul : Predicate<"Subtarget.calVersion()"
+ "< CAL_VERSION_SC_139">;
+// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
+def HasHW64DivMod : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
+def HasSW64DivMod : Predicate<"Subtarget.device()"
+ "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
+
+// Predicate that is set to true if 64bit pointer are used.
+def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
+def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+def brtarget : Operand<OtherVT>;
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Type Profiles
+//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Generic Profile Types
+//===----------------------------------------------------------------------===//
+
+def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+ ]>;
+def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
+ ]>;
+def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
+ SDTCisEltOfVec<1, 0>
+ ]>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control Profile Types
+//===----------------------------------------------------------------------===//
+// Branch instruction where second and third are basic blocks
+def SDTIL_BRCond : SDTypeProfile<0, 2, [
+ SDTCisVT<0, OtherVT>
+ ]>;
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Nodes
+//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Flow Control DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Call/Return DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+//===--------------------------------------------------------------------===//
+// Instructions
+//===--------------------------------------------------------------------===//
+// Floating point math functions
+def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
+def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Integer functions
+//===----------------------------------------------------------------------===//
+def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+
+//===----------------------------------------------------------------------===//
+// Vector functions
+//===----------------------------------------------------------------------===//
+def IL_vbuild : SDNode<"AMDGPUISD::VBUILD", SDTIL_GenVecBuild,
+ []>;
+
+//===--------------------------------------------------------------------===//
+// Custom Pattern DAG Nodes
+//===--------------------------------------------------------------------===//
+def global_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Load pattern fragments
+//===----------------------------------------------------------------------===//
+// Global address space loads
+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Constant address space loads
+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Complex addressing mode patterns
+//===----------------------------------------------------------------------===//
+def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
+def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
+def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
+def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format classes
+//===----------------------------------------------------------------------===//
+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+ let Namespace = "AMDGPU";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ let isPseudo = 1;
+ let Itinerary = NullALU;
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// Multiclass Instruction formats
+//===--------------------------------------------------------------------===//
+// Multiclass that handles branch instructions
+multiclass BranchConditional<SDNode Op> {
+ def _i32 : ILFormat<(outs),
+ (ins brtarget:$target, GPRI32:$src0),
+ "; i32 Pseudo branch instruction",
+ [(Op bb:$target, GPRI32:$src0)]>;
+ def _f32 : ILFormat<(outs),
+ (ins brtarget:$target, GPRF32:$src0),
+ "; f32 Pseudo branch instruction",
+ [(Op bb:$target, GPRF32:$src0)]>;
+}
+
+// Only scalar types should generate flow control
+multiclass BranchInstr<string name> {
+ def _i32 : ILFormat<(outs), (ins GPRI32:$src),
+ !strconcat(name, " $src"), []>;
+ def _f32 : ILFormat<(outs), (ins GPRF32:$src),
+ !strconcat(name, " $src"), []>;
+}
+// Only scalar types should generate flow control
+multiclass BranchInstr2<string name> {
+ def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+ def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+}
+
+//===--------------------------------------------------------------------===//
+// Intrinsics support
+//===--------------------------------------------------------------------===//
+include "AMDILIntrinsics.td"
+
+//===--------------------------------------------------------------------===//
+// Instructions support
+//===--------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
+// Custom Inserter for Branches and returns, this eventually will be a
+// seperate pass
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, usesCustomInserter = 1 in {
+ def BRANCH : ILFormat<(outs), (ins brtarget:$target),
+ "; Pseudo unconditional branch instruction",
+ [(br bb:$target)]>;
+ defm BRANCH_COND : BranchConditional<IL_brcond>;
+}
+
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+let isTerminator=1 in {
+ def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
+ !strconcat("SWITCH", " $src"), []>;
+ def CASE : ILFormat< (outs), (ins GPRI32:$src),
+ !strconcat("CASE", " $src"), []>;
+ def BREAK : ILFormat< (outs), (ins),
+ "BREAK", []>;
+ def CONTINUE : ILFormat< (outs), (ins),
+ "CONTINUE", []>;
+ def DEFAULT : ILFormat< (outs), (ins),
+ "DEFAULT", []>;
+ def ELSE : ILFormat< (outs), (ins),
+ "ELSE", []>;
+ def ENDSWITCH : ILFormat< (outs), (ins),
+ "ENDSWITCH", []>;
+ def ENDMAIN : ILFormat< (outs), (ins),
+ "ENDMAIN", []>;
+ def END : ILFormat< (outs), (ins),
+ "END", []>;
+ def ENDFUNC : ILFormat< (outs), (ins),
+ "ENDFUNC", []>;
+ def ENDIF : ILFormat< (outs), (ins),
+ "ENDIF", []>;
+ def WHILELOOP : ILFormat< (outs), (ins),
+ "WHILE", []>;
+ def ENDLOOP : ILFormat< (outs), (ins),
+ "ENDLOOP", []>;
+ def FUNC : ILFormat< (outs), (ins),
+ "FUNC", []>;
+ def RETDYN : ILFormat< (outs), (ins),
+ "RET_DYN", []>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
+ defm IFC : BranchInstr2<"IFC">;
+ defm BREAKC : BranchInstr2<"BREAKC">;
+ defm CONTINUEC : BranchInstr2<"CONTINUEC">;
+}
diff --git a/lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp b/lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp
new file mode 100644
index 0000000..1b921ba
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp
@@ -0,0 +1,93 @@
+//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the AMDGPU Implementation of the IntrinsicInfo class.
+//
+//===-----------------------------------------------------------------------===//
+
+#include "AMDILIntrinsicInfo.h"
+#include "AMDIL.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+
+using namespace llvm;
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
+ : TargetIntrinsicInfo()
+{
+}
+
+std::string
+AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
+ unsigned int numTys) const
+{
+ static const char* const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ //assert(!isOverloaded(IntrID)
+ //&& "AMDGPU Intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics) {
+ return 0;
+ }
+ assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
+ && "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned int
+AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
+{
+#define GET_FUNCTION_RECOGNIZER
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ AMDGPUIntrinsic::ID IntrinsicID
+ = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
+
+ if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
+ return IntrinsicID;
+ }
+ return 0;
+}
+
+bool
+AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const
+{
+ // Overload Table
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function*
+AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTys) const
+{
+ //Silence a warning
+ AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID);
+ (void)List;
+ assert(!"Not implemented");
+}
diff --git a/lib/Target/AMDGPU/AMDILIntrinsicInfo.h b/lib/Target/AMDGPU/AMDILIntrinsicInfo.h
new file mode 100644
index 0000000..5d9e845
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILIntrinsicInfo.h
@@ -0,0 +1,47 @@
+//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the AMDGPU Implementation of the Intrinsic Info class.
+//
+//===-----------------------------------------------------------------------===//
+#ifndef _AMDIL_INTRINSICS_H_
+#define _AMDIL_INTRINSICS_H_
+
+#include "llvm/Intrinsics.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+ class TargetMachine;
+ namespace AMDGPUIntrinsic {
+ enum ID {
+ last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_AMDGPU_intrinsics
+ };
+
+ }
+
+
+ class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
+ public:
+ AMDGPUIntrinsicInfo(TargetMachine *tm);
+ std::string getName(unsigned int IntrId, Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ unsigned int lookupName(const char *Name, unsigned int Len) const;
+ bool isOverloaded(unsigned int IID) const;
+ Function *getDeclaration(Module *M, unsigned int ID,
+ Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ }; // AMDGPUIntrinsicInfo
+}
+
+#endif // _AMDIL_INTRINSICS_H_
+
diff --git a/lib/Target/AMDGPU/AMDILIntrinsics.td b/lib/Target/AMDGPU/AMDILIntrinsics.td
new file mode 100644
index 0000000..104b32e
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILIntrinsics.td
@@ -0,0 +1,247 @@
+//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines all of the amdil-specific intrinsics
+//
+//===---------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
+// Intrinsic classes
+// Generic versions of the above classes but for Target specific intrinsics
+// instead of SDNode patterns.
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ class VoidIntLong :
+ Intrinsic<[llvm_i64_ty], [], []>;
+ class VoidIntInt :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class VoidIntBool :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class UnaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ class UnaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ class ConvertIntFTOI :
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
+ class ConvertIntITOF :
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
+ class UnaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty], []>;
+ class BinaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class BinaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class BinaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
+ class TernaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class TernaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class QuaternaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class UnaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ class UnaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
+
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ def int_AMDIL_fabs : GCCBuiltin<"__amdil_fabs">, UnaryIntFloat;
+ def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
+
+ def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
+ TernaryIntInt;
+ def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
+ TernaryIntInt;
+ def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
+ UnaryIntInt;
+ def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
+ UnaryIntInt;
+ def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
+ TernaryIntInt;
+ def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
+ TernaryIntInt;
+ def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
+ QuaternaryIntInt;
+ def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
+ TernaryIntInt;
+ def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
+ BinaryIntInt;
+ def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
+ TernaryIntInt;
+ def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
+ TernaryIntInt;
+ def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
+ TernaryIntFloat;
+ def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
+ BinaryIntInt;
+ def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
+ BinaryIntInt;
+ def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
+ BinaryIntInt;
+ def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
+ TernaryIntInt;
+ def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
+ TernaryIntInt;
+ def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
+ BinaryIntInt;
+ def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
+ BinaryIntInt;
+ def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
+ BinaryIntInt;
+ def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
+ BinaryIntInt;
+ def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
+ BinaryIntFloat;
+ def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
+ BinaryIntInt;
+ def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
+ BinaryIntInt;
+ def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
+ BinaryIntFloat;
+ def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
+ TernaryIntInt;
+ def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
+ TernaryIntInt;
+ def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
+ TernaryIntInt;
+ def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
+ UnaryIntFloat;
+ def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
+ TernaryIntFloat;
+ def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
+ UnaryIntFloat;
+ def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
+ UnaryIntFloat;
+ def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
+ UnaryIntFloat;
+ def int_AMDIL_round_posinf : GCCBuiltin<"__amdil_round_posinf">,
+ UnaryIntFloat;
+ def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
+ UnaryIntFloat;
+ def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
+ UnaryIntFloat;
+ def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
+ UnaryIntFloat;
+ def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
+ UnaryIntFloat;
+ def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
+ UnaryIntFloat;
+ def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
+ UnaryIntFloat;
+ def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
+ UnaryIntFloat;
+ def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
+ def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
+ def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
+ def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
+ UnaryIntFloat;
+ def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
+ UnaryIntFloat;
+ def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
+ UnaryIntFloat;
+ def int_AMDIL_log : GCCBuiltin<"__amdil_log">,
+ UnaryIntFloat;
+ def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
+ UnaryIntFloat;
+ def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
+ UnaryIntFloat;
+ def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
+ TernaryIntFloat;
+ def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
+ UnaryIntFloat;
+ def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
+ UnaryIntFloat;
+ def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
+ TernaryIntFloat;
+ def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i32_ty], []>;
+
+ def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
+ def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+ def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+ def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
+ ConvertIntITOF;
+ def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
+ ConvertIntFTOI;
+ def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
+ def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
+ ConvertIntITOF;
+ def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty, llvm_float_ty], []>;
+ def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty], []>;
+ def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
+ def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
+}
diff --git a/lib/Target/AMDGPU/AMDILNIDevice.cpp b/lib/Target/AMDGPU/AMDILNIDevice.cpp
new file mode 100644
index 0000000..0ebbc9d
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILNIDevice.cpp
@@ -0,0 +1,71 @@
+//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILNIDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+
+AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST)
+{
+ std::string name = ST->getDeviceName();
+ if (name == "caicos") {
+ mDeviceFlag = OCL_DEVICE_CAICOS;
+ } else if (name == "turks") {
+ mDeviceFlag = OCL_DEVICE_TURKS;
+ } else if (name == "cayman") {
+ mDeviceFlag = OCL_DEVICE_CAYMAN;
+ } else {
+ mDeviceFlag = OCL_DEVICE_BARTS;
+ }
+}
+AMDGPUNIDevice::~AMDGPUNIDevice()
+{
+}
+
+size_t
+AMDGPUNIDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDGPUNIDevice::getGeneration() const
+{
+ return AMDGPUDeviceInfo::HD6XXX;
+}
+
+
+AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
+ : AMDGPUNIDevice(ST)
+{
+ setCaps();
+}
+
+AMDGPUCaymanDevice::~AMDGPUCaymanDevice()
+{
+}
+
+void
+AMDGPUCaymanDevice::setCaps()
+{
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ mHWBits.set(AMDGPUDeviceInfo::FMA);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
+ mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
+}
+
diff --git a/lib/Target/AMDGPU/AMDILNIDevice.h b/lib/Target/AMDGPU/AMDILNIDevice.h
new file mode 100644
index 0000000..387f7d1
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILNIDevice.h
@@ -0,0 +1,59 @@
+//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===---------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef _AMDILNIDEVICE_H_
+#define _AMDILNIDEVICE_H_
+#include "AMDILEvergreenDevice.h"
+#include "AMDGPUSubtarget.h"
+
+namespace llvm {
+ class AMDGPUSubtarget;
+//===---------------------------------------------------------------------===//
+// NI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+// The AMDGPUNIDevice is the base class for all Northern Island series of
+// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
+// exception being differences in wavefront size and hardware capabilities. The
+// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
+// integer operations
+
+ class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
+ public:
+ AMDGPUNIDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUNIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+ protected:
+ }; // AMDGPUNIDevice
+
+// Just as the AMDGPUCypressDevice is the double capable version of the
+// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version of
+// the AMDGPUNIDevice. The other major difference that is not as useful from
+// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
+// NI family is a 5 wide.
+
+ class AMDGPUCaymanDevice: public AMDGPUNIDevice {
+ public:
+ AMDGPUCaymanDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUCaymanDevice();
+ private:
+ virtual void setCaps();
+ }; // AMDGPUCaymanDevice
+
+ static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
+} // namespace llvm
+#endif // _AMDILNIDEVICE_H_
diff --git a/lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp b/lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp
new file mode 100644
index 0000000..758ed34
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp
@@ -0,0 +1,1282 @@
+//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PeepholeOpt"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILDevices.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <sstream>
+
+#if 0
+STATISTIC(PointerAssignments, "Number of dynamic pointer "
+ "assigments discovered");
+STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
+#endif
+
+using namespace llvm;
+// The Peephole optimization pass is used to do simple last minute optimizations
+// that are required for correct code or to remove redundant functions
+namespace {
+
+class OpaqueType;
+
+class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
+public:
+ TargetMachine &TM;
+ static char ID;
+ AMDGPUPeepholeOpt(TargetMachine &tm);
+ ~AMDGPUPeepholeOpt();
+ const char *getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+protected:
+private:
+ // Function to initiate all of the instruction level optimizations.
+ bool instLevelOptimizations(BasicBlock::iterator *inst);
+ // Quick check to see if we need to dump all of the pointers into the
+ // arena. If this is correct, then we set all pointers to exist in arena. This
+ // is a workaround for aliasing of pointers in a struct/union.
+ bool dumpAllIntoArena(Function &F);
+ // Because I don't want to invalidate any pointers while in the
+ // safeNestedForEachFunction. I push atomic conversions to a vector and handle
+ // it later. This function does the conversions if required.
+ void doAtomicConversionIfNeeded(Function &F);
+ // Because __amdil_is_constant cannot be properly evaluated if
+ // optimizations are disabled, the call's are placed in a vector
+ // and evaluated after the __amdil_image* functions are evaluated
+ // which should allow the __amdil_is_constant function to be
+ // evaluated correctly.
+ void doIsConstCallConversionIfNeeded();
+ bool mChanged;
+ bool mDebug;
+ bool mConvertAtomics;
+ CodeGenOpt::Level optLevel;
+ // Run a series of tests to see if we can optimize a CALL instruction.
+ bool optimizeCallInst(BasicBlock::iterator *bbb);
+ // A peephole optimization to optimize bit extract sequences.
+ bool optimizeBitExtract(Instruction *inst);
+ // A peephole optimization to optimize bit insert sequences.
+ bool optimizeBitInsert(Instruction *inst);
+ bool setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift);
+ // Expand the bit field insert instruction on versions of OpenCL that
+ // don't support it.
+ bool expandBFI(CallInst *CI);
+ // Expand the bit field mask instruction on version of OpenCL that
+ // don't support it.
+ bool expandBFM(CallInst *CI);
+ // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
+ // this case we need to expand them. These functions check for 24bit functions
+ // and then expand.
+ bool isSigned24BitOps(CallInst *CI);
+ void expandSigned24BitOps(CallInst *CI);
+ // One optimization that can occur is that if the required workgroup size is
+ // specified then the result of get_local_size is known at compile time and
+ // can be returned accordingly.
+ bool isRWGLocalOpt(CallInst *CI);
+ // On northern island cards, the division is slightly less accurate than on
+ // previous generations, so we need to utilize a more accurate division. So we
+ // can translate the accurate divide to a normal divide on all other cards.
+ bool convertAccurateDivide(CallInst *CI);
+ void expandAccurateDivide(CallInst *CI);
+ // If the alignment is set incorrectly, it can produce really inefficient
+ // code. This checks for this scenario and fixes it if possible.
+ bool correctMisalignedMemOp(Instruction *inst);
+
+ // If we are in no opt mode, then we need to make sure that
+ // local samplers are properly propagated as constant propagation
+ // doesn't occur and we need to know the value of kernel defined
+ // samplers at compile time.
+ bool propagateSamplerInst(CallInst *CI);
+
+ // Helper functions
+
+ // Group of functions that recursively calculate the size of a structure based
+ // on it's sub-types.
+ size_t getTypeSize(Type * const T, bool dereferencePtr = false);
+ size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
+ size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
+ size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
+ size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
+ size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
+ size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
+ size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
+
+ LLVMContext *mCTX;
+ Function *mF;
+ const AMDGPUSubtarget *mSTM;
+ SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
+ SmallVector<CallInst *, 16> isConstVec;
+}; // class AMDGPUPeepholeOpt
+ char AMDGPUPeepholeOpt::ID = 0;
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator.
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F)
+{
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
+
+} // anonymous namespace
+
+namespace llvm {
+ FunctionPass *
+ createAMDGPUPeepholeOpt(TargetMachine &tm)
+ {
+ return new AMDGPUPeepholeOpt(tm);
+ }
+} // llvm namespace
+
+AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
+ : FunctionPass(ID), TM(tm)
+{
+ mDebug = DEBUGME;
+ optLevel = TM.getOptLevel();
+
+}
+
+AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt()
+{
+}
+
+const char *
+AMDGPUPeepholeOpt::getPassName() const
+{
+ return "AMDGPU PeepHole Optimization Pass";
+}
+
+bool
+containsPointerType(Type *Ty)
+{
+ if (!Ty) {
+ return false;
+ }
+ switch(Ty->getTypeID()) {
+ default:
+ return false;
+ case Type::StructTyID: {
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ for (StructType::element_iterator stb = ST->element_begin(),
+ ste = ST->element_end(); stb != ste; ++stb) {
+ if (!containsPointerType(*stb)) {
+ continue;
+ }
+ return true;
+ }
+ break;
+ }
+ case Type::VectorTyID:
+ case Type::ArrayTyID:
+ return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
+ case Type::PointerTyID:
+ return true;
+ };
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F)
+{
+ bool dumpAll = false;
+ for (Function::const_arg_iterator cab = F.arg_begin(),
+ cae = F.arg_end(); cab != cae; ++cab) {
+ const Argument *arg = cab;
+ const PointerType *PT = dyn_cast<PointerType>(arg->getType());
+ if (!PT) {
+ continue;
+ }
+ Type *DereferencedType = PT->getElementType();
+ if (!dyn_cast<StructType>(DereferencedType)
+ ) {
+ continue;
+ }
+ if (!containsPointerType(DereferencedType)) {
+ continue;
+ }
+ // FIXME: Because a pointer inside of a struct/union may be aliased to
+ // another pointer we need to take the conservative approach and place all
+ // pointers into the arena until more advanced detection is implemented.
+ dumpAll = true;
+ }
+ return dumpAll;
+}
+void
+AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded()
+{
+ if (isConstVec.empty()) {
+ return;
+ }
+ for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
+ CallInst *CI = isConstVec[x];
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ CI->eraseFromParent();
+ }
+ isConstVec.clear();
+}
+void
+AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
+{
+ // Don't do anything if we don't have any atomic operations.
+ if (atomicFuncs.empty()) {
+ return;
+ }
+ // Change the function name for the atomic if it is required
+ uint32_t size = atomicFuncs.size();
+ for (uint32_t x = 0; x < size; ++x) {
+ atomicFuncs[x].first->setOperand(
+ atomicFuncs[x].first->getNumOperands()-1,
+ atomicFuncs[x].second);
+
+ }
+ mChanged = true;
+ if (mConvertAtomics) {
+ return;
+ }
+}
+
+bool
+AMDGPUPeepholeOpt::runOnFunction(Function &MF)
+{
+ mChanged = false;
+ mF = &MF;
+ mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
+ if (mDebug) {
+ MF.dump();
+ }
+ mCTX = &MF.getType()->getContext();
+ mConvertAtomics = true;
+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+ std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
+ this));
+
+ doAtomicConversionIfNeeded(MF);
+ doIsConstCallConversionIfNeeded();
+
+ if (mDebug) {
+ MF.dump();
+ }
+ return mChanged;
+}
+
+bool
+AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
+{
+ Instruction *inst = (*bbb);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+ if (!CI) {
+ return false;
+ }
+ if (isSigned24BitOps(CI)) {
+ expandSigned24BitOps(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (propagateSamplerInst(CI)) {
+ return false;
+ }
+ if (expandBFI(CI) || expandBFM(CI)) {
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (convertAccurateDivide(CI)) {
+ expandAccurateDivide(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+
+ StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
+ if (calleeName.startswith("__amdil_is_constant")) {
+ // If we do not have optimizations, then this
+ // cannot be properly evaluated, so we add the
+ // call instruction to a vector and process
+ // them at the end of processing after the
+ // samplers have been correctly handled.
+ if (optLevel == CodeGenOpt::None) {
+ isConstVec.push_back(CI);
+ return false;
+ } else {
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ }
+
+ if (calleeName.equals("__amdil_is_asic_id_i32")) {
+ ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = CV;
+ if (Val) {
+ Val = ConstantInt::get(aType,
+ mSTM->device()->getDeviceFlag() & CV->getZExtValue());
+ } else {
+ Val = ConstantInt::get(aType, 0);
+ }
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
+ if (!F) {
+ return false;
+ }
+ if (F->getName().startswith("__atom") && !CI->getNumUses()
+ && F->getName().find("_xchg") == StringRef::npos) {
+ std::string buffer(F->getName().str() + "_noret");
+ F = dyn_cast<Function>(
+ F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
+ atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
+ }
+
+ if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
+ && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
+ return false;
+ }
+ if (!mConvertAtomics) {
+ return false;
+ }
+ StringRef name = F->getName();
+ if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
+ mConvertAtomics = false;
+ }
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift)
+{
+ if (!base) {
+ if (mDebug) {
+ dbgs() << "Null pointer passed into function.\n";
+ }
+ return false;
+ }
+ bool andOp = false;
+ if (base->getOpcode() == Instruction::Shl) {
+ shift = dyn_cast<Constant>(base->getOperand(1));
+ } else if (base->getOpcode() == Instruction::And) {
+ mask = dyn_cast<Constant>(base->getOperand(1));
+ andOp = true;
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
+ }
+ // If the base is neither a Shl or a And, we don't fit any of the patterns above.
+ return false;
+ }
+ src = dyn_cast<Instruction>(base->getOperand(0));
+ if (!src) {
+ if (mDebug) {
+ dbgs() << "Failed setup since the base operand is not an instruction!\n";
+ }
+ return false;
+ }
+ // If we find an 'and' operation, then we don't need to
+ // find the next operation as we already know the
+ // bits that are valid at this point.
+ if (andOp) {
+ return true;
+ }
+ if (src->getOpcode() == Instruction::Shl && !shift) {
+ shift = dyn_cast<Constant>(src->getOperand(1));
+ src = dyn_cast<Instruction>(src->getOperand(0));
+ } else if (src->getOpcode() == Instruction::And && !mask) {
+ mask = dyn_cast<Constant>(src->getOperand(1));
+ }
+ if (!mask && !shift) {
+ if (mDebug) {
+ dbgs() << "Failed setup since both mask and shift are NULL!\n";
+ }
+ // Did not find a constant mask or a shift.
+ return false;
+ }
+ return true;
+}
+bool
+AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst)
+{
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::Or) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do an optimization on a sequence of ops that in the end equals a
+ // single ISA instruction.
+ // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
+ // Some simplified versions of this pattern are as follows:
+ // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
+ // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
+ // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
+ // (A & B) | (D << F) when (1 << F) >= B
+ // (A << C) | (D & E) when (1 << C) >= E
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
+ // The HD4XXX hardware doesn't support the ubit_insert instruction.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = 1;
+ // This optimization only works on 32bit integers.
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ // TODO: Handle vectors.
+ if (isVector) {
+ if (mDebug) {
+ dbgs() << "!!! Vectors are not supported yet!\n";
+ }
+ return false;
+ }
+ Instruction *LHSSrc = NULL, *RHSSrc = NULL;
+ Constant *LHSMask = NULL, *RHSMask = NULL;
+ Constant *LHSShift = NULL, *RHSShift = NULL;
+ Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
+ Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
+ if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (LHS) { LHS->dump(); }
+ if (LHSSrc) { LHSSrc->dump(); }
+ if (LHSMask) { LHSMask->dump(); }
+ if (LHSShift) { LHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (RHS) { RHS->dump(); }
+ if (RHSSrc) { RHSSrc->dump(); }
+ if (RHSMask) { RHSMask->dump(); }
+ if (RHSShift) { RHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
+ dbgs() << "Op: "; inst->dump();
+ dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ }
+ Constant *offset = NULL;
+ Constant *width = NULL;
+ uint32_t lhsMaskVal = 0, rhsMaskVal = 0;
+ uint32_t lhsShiftVal = 0, rhsShiftVal = 0;
+ uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
+ uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
+ lhsMaskVal = (LHSMask
+ ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
+ rhsMaskVal = (RHSMask
+ ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
+ lhsShiftVal = (LHSShift
+ ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
+ rhsShiftVal = (RHSShift
+ ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
+ lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
+ rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
+ lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
+ rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
+ // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
+ if (mDebug) {
+ dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
+ dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
+ dbgs() << (RHSMask ? " & E)" : ")");
+ dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
+ dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
+ dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
+ dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
+ dbgs() << "width(B) = " << lhsMaskWidth;
+ dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
+ dbgs() << "offset(B) = " << lhsMaskOffset;
+ dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
+ dbgs() << "Constraints: \n";
+ dbgs() << "\t(1) B ^ E == 0\n";
+ dbgs() << "\t(2-LHS) B is a mask\n";
+ dbgs() << "\t(2-LHS) E is a mask\n";
+ dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
+ dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
+ }
+ if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
+ if (mDebug) {
+ dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
+ dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
+ dbgs() << "Failed constraint 1!\n";
+ }
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "LHS = " << lhsMaskOffset << "";
+ dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
+ dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
+ dbgs() << "\nRHS = " << rhsMaskOffset << "";
+ dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
+ dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
+ dbgs() << "\n";
+ }
+ if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
+ offset = ConstantInt::get(aType, lhsMaskOffset, false);
+ width = ConstantInt::get(aType, lhsMaskWidth, false);
+ RHSSrc = RHS;
+ if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
+ if (mDebug) {
+ dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
+ dbgs() << "Failed constraint 2!\n";
+ }
+ return false;
+ }
+ if (!LHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ } else if (lhsShiftVal != lhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing LHS!\n";
+ }
+ } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
+ offset = ConstantInt::get(aType, rhsMaskOffset, false);
+ width = ConstantInt::get(aType, rhsMaskWidth, false);
+ LHSSrc = RHSSrc;
+ RHSSrc = LHS;
+ if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
+ if (mDebug) {
+ dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
+ dbgs() << "Failed constraint 2!\n";
+ }
+ return false;
+ }
+ if (!RHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ } else if (rhsShiftVal != rhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing RHS!\n";
+ }
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed constraint 3!\n";
+ }
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ }
+ if (!offset || !width) {
+ if (mDebug) {
+ dbgs() << "Either width or offset are NULL, failed detection!\n";
+ }
+ return false;
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "__amdil_ubit_insert";
+ if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[4] = {
+ width,
+ offset,
+ LHSSrc,
+ RHSSrc
+ };
+ CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
+ if (mDebug) {
+ dbgs() << "Old Inst: ";
+ inst->dump();
+ dbgs() << "New Inst: ";
+ CI->dump();
+ dbgs() << "\n\n";
+ }
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst)
+{
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::And) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do some simple optimizations on Shift right/And patterns. The
+ // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
+ // value smaller than 32 and C is a mask. If C is a constant value, then the
+ // following transformation can occur. For signed integers, it turns into the
+ // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
+ // integers, it turns into the function call dst =
+ // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
+ // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
+ // Evergreen hardware.
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
+ // This does not work on HD4XXX hardware.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+
+ // XXX Support vector types
+ if (isVector) {
+ return false;
+ }
+ int numEle = 1;
+ // This only works on 32bit integers
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
+ // If the first operand is not a shift instruction, then we can return as it
+ // doesn't match this pattern.
+ if (!ShiftInst || !ShiftInst->isShift()) {
+ return false;
+ }
+ // If we are a shift left, then we need don't match this pattern.
+ if (ShiftInst->getOpcode() == Instruction::Shl) {
+ return false;
+ }
+ bool isSigned = ShiftInst->isArithmeticShift();
+ Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
+ Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
+ // Lets make sure that the shift value and the and mask are constant integers.
+ if (!AndMask || !ShrVal) {
+ return false;
+ }
+ Constant *newMaskConst;
+ Constant *shiftValConst;
+ if (isVector) {
+ // Handle the vector case
+ std::vector<Constant *> maskVals;
+ std::vector<Constant *> shiftVals;
+ ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
+ ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
+ Type *scalarType = AndMaskVec->getType()->getScalarType();
+ assert(AndMaskVec->getNumOperands() ==
+ ShrValVec->getNumOperands() && "cannot have a "
+ "combination where the number of elements to a "
+ "shift and an and are different!");
+ for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
+ ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
+ ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
+ if (!AndCI || !ShiftIC) {
+ return false;
+ }
+ uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
+ // If the mask or shiftval is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left
+ // then this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
+ shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
+ }
+ newMaskConst = ConstantVector::get(maskVals);
+ shiftValConst = ConstantVector::get(shiftVals);
+ } else {
+ // Handle the scalar case
+ uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
+ // This must be a mask value where all lower bits are set to 1 and then any
+ // bit higher is set to 0.
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ // Count the number of bits set in the mask, this is the width of the
+ // resulting bit set that is extracted from the source value.
+ uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
+ // If the mask or shift val is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left then
+ // this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
+ shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "llvm.AMDGPU.bit.extract.u32";
+ if (isVector) {
+ name += ".v" + itostr(numEle) + "i32";
+ } else {
+ name += ".";
+ }
+ // Lets create the function.
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[3] = {
+ ShiftInst->getOperand(0),
+ shiftValConst,
+ newMaskConst
+ };
+ // Lets create the Call with the operands
+ CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+ CI->setDoesNotAccessMemory();
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::expandBFI(CallInst *CI)
+{
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ if (!LHS->getName().startswith("__amdil_bfi")) {
+ return false;
+ }
+ Type* type = CI->getOperand(0)->getType();
+ Constant *negOneConst = NULL;
+ if (type->isVectorTy()) {
+ std::vector<Constant *> negOneVals;
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ negOneVals.push_back(negOneConst);
+ }
+ negOneConst = ConstantVector::get(negOneVals);
+ } else {
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ }
+ // __amdil_bfi => (A & B) | (~A & C)
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ CI->getOperand(1), "bfi_and", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
+ "bfi_not", CI);
+ rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
+ "bfi_and", CI);
+ lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::expandBFM(CallInst *CI)
+{
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ if (!LHS->getName().startswith("__amdil_bfm")) {
+ return false;
+ }
+ // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
+ Constant *newMaskConst = NULL;
+ Constant *newShiftConst = NULL;
+ Type* type = CI->getOperand(0)->getType();
+ if (type->isVectorTy()) {
+ std::vector<Constant*> newMaskVals, newShiftVals;
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ newMaskVals.push_back(newMaskConst);
+ newShiftVals.push_back(newShiftConst);
+ }
+ newMaskConst = ConstantVector::get(newMaskVals);
+ newShiftConst = ConstantVector::get(newShiftVals);
+ } else {
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ }
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
+ lhs, "bfm_shl", CI);
+ lhs = BinaryOperator::Create(Instruction::Sub, lhs,
+ newShiftConst, "bfm_sub", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(1),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
+{
+ Instruction *inst = (*bbb);
+ if (optimizeCallInst(bbb)) {
+ return true;
+ }
+ if (optimizeBitExtract(inst)) {
+ return false;
+ }
+ if (optimizeBitInsert(inst)) {
+ return false;
+ }
+ if (correctMisalignedMemOp(inst)) {
+ return false;
+ }
+ return false;
+}
+bool
+AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
+{
+ LoadInst *linst = dyn_cast<LoadInst>(inst);
+ StoreInst *sinst = dyn_cast<StoreInst>(inst);
+ unsigned alignment;
+ Type* Ty = inst->getType();
+ if (linst) {
+ alignment = linst->getAlignment();
+ Ty = inst->getType();
+ } else if (sinst) {
+ alignment = sinst->getAlignment();
+ Ty = sinst->getValueOperand()->getType();
+ } else {
+ return false;
+ }
+ unsigned size = getTypeSize(Ty);
+ if (size == alignment || size < alignment) {
+ return false;
+ }
+ if (!Ty->isStructTy()) {
+ return false;
+ }
+ if (alignment < 4) {
+ if (linst) {
+ linst->setAlignment(0);
+ return true;
+ } else if (sinst) {
+ sinst->setAlignment(0);
+ return true;
+ }
+ }
+ return false;
+}
+bool
+AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI)
+{
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ std::string namePrefix = LHS->getName().substr(0, 14);
+ if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
+ && namePrefix != "__amdil__imul24_high") {
+ return false;
+ }
+ if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
+ return false;
+ }
+ return true;
+}
+
+void
+AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI)
+{
+ assert(isSigned24BitOps(CI) && "Must be a "
+ "signed 24 bit operation to call this function!");
+ Value *LHS = CI->getOperand(CI->getNumOperands()-1);
+ // On 7XX and 8XX we do not have signed 24bit, so we need to
+ // expand it to the following:
+ // imul24 turns into 32bit imul
+ // imad24 turns into 32bit imad
+ // imul24_high turns into 32bit imulhigh
+ if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
+ Type *aType = CI->getOperand(0)->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ callTypes.push_back(CI->getOperand(2)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+ std::string name = "__amdil_imad";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[3] = {
+ CI->getOperand(0),
+ CI->getOperand(1),
+ CI->getOperand(2)
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
+ BinaryOperator *mulOp =
+ BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
+ CI->getOperand(1), "imul24", CI);
+ CI->replaceAllUsesWith(mulOp);
+ } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
+ Type *aType = CI->getOperand(0)->getType();
+
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+ std::string name = "__amdil_imul_high";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[2] = {
+ CI->getOperand(0),
+ CI->getOperand(1)
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ }
+}
+
+bool
+AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI)
+{
+ return (CI != NULL
+ && CI->getOperand(CI->getNumOperands() - 1)->getName()
+ == "__amdil_get_local_size_int");
+}
+
+bool
+AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI)
+{
+ if (!CI) {
+ return false;
+ }
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
+ && (mSTM->getDeviceName() == "cayman")) {
+ return false;
+ }
+ return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
+ == "__amdil_improved_div";
+}
+
+void
+AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI)
+{
+ assert(convertAccurateDivide(CI)
+ && "expanding accurate divide can only happen if it is expandable!");
+ BinaryOperator *divOp =
+ BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
+ CI->getOperand(1), "fdiv32", CI);
+ CI->replaceAllUsesWith(divOp);
+}
+
+bool
+AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI)
+{
+ if (optLevel != CodeGenOpt::None) {
+ return false;
+ }
+
+ if (!CI) {
+ return false;
+ }
+
+ unsigned funcNameIdx = 0;
+ funcNameIdx = CI->getNumOperands() - 1;
+ StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
+ if (calleeName != "__amdil_image2d_read_norm"
+ && calleeName != "__amdil_image2d_read_unnorm"
+ && calleeName != "__amdil_image3d_read_norm"
+ && calleeName != "__amdil_image3d_read_unnorm") {
+ return false;
+ }
+
+ unsigned samplerIdx = 2;
+ samplerIdx = 1;
+ Value *sampler = CI->getOperand(samplerIdx);
+ LoadInst *lInst = dyn_cast<LoadInst>(sampler);
+ if (!lInst) {
+ return false;
+ }
+
+ if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return false;
+ }
+
+ GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
+ // If we are loading from what is not a global value, then we
+ // fail and return.
+ if (!gv) {
+ return false;
+ }
+
+ // If we don't have an initializer or we have an initializer and
+ // the initializer is not a 32bit integer, we fail.
+ if (!gv->hasInitializer()
+ || !gv->getInitializer()->getType()->isIntegerTy(32)) {
+ return false;
+ }
+
+ // Now that we have the global variable initializer, lets replace
+ // all uses of the load instruction with the samplerVal and
+ // reparse the __amdil_is_constant() function.
+ Constant *samplerVal = gv->getInitializer();
+ lInst->replaceAllUsesWith(samplerVal);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::doInitialization(Module &M)
+{
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::doFinalization(Module &M)
+{
+ return false;
+}
+
+void
+AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
+ size_t size = 0;
+ if (!T) {
+ return size;
+ }
+ switch (T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ size = T->getPrimitiveSizeInBits() >> 3;
+ break;
+ case Type::PointerTyID:
+ size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
+ break;
+ case Type::IntegerTyID:
+ size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
+ break;
+ case Type::StructTyID:
+ size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
+ break;
+ case Type::ArrayTyID:
+ size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
+ break;
+ case Type::FunctionTyID:
+ size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
+ break;
+ case Type::VectorTyID:
+ size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
+ break;
+ };
+ return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
+ bool dereferencePtr) {
+ size_t size = 0;
+ if (!ST) {
+ return size;
+ }
+ Type *curType;
+ StructType::element_iterator eib;
+ StructType::element_iterator eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ curType = *eib;
+ size += getTypeSize(curType, dereferencePtr);
+ }
+ return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
+ bool dereferencePtr) {
+ return IT ? (IT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
+ bool dereferencePtr) {
+ assert(0 && "Should not be able to calculate the size of an function type");
+ return 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
+ bool dereferencePtr) {
+ return (size_t)(AT ? (getTypeSize(AT->getElementType(),
+ dereferencePtr) * AT->getNumElements())
+ : 0);
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
+ bool dereferencePtr) {
+ return VT ? (VT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
+ bool dereferencePtr) {
+ if (!PT) {
+ return 0;
+ }
+ Type *CT = PT->getElementType();
+ if (CT->getTypeID() == Type::StructTyID &&
+ PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ return getTypeSize(dyn_cast<StructType>(CT));
+ } else if (dereferencePtr) {
+ size_t size = 0;
+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+ size += getTypeSize(PT->getContainedType(x), dereferencePtr);
+ }
+ return size;
+ } else {
+ return 4;
+ }
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
+ bool dereferencePtr) {
+ //assert(0 && "Should not be able to calculate the size of an opaque type");
+ return 4;
+}
diff --git a/lib/Target/AMDGPU/AMDILRegisterInfo.td b/lib/Target/AMDGPU/AMDILRegisterInfo.td
new file mode 100644
index 0000000..42235ff
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILRegisterInfo.td
@@ -0,0 +1,110 @@
+//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Declarations that describe the AMDIL register file
+//
+//===----------------------------------------------------------------------===//
+
+class AMDILReg<bits<16> num, string n> : Register<n> {
+ field bits<16> Value;
+ let Value = num;
+ let Namespace = "AMDGPU";
+}
+
+// We will start with 8 registers for each class before expanding to more
+// Since the swizzle is added based on the register class, we can leave it
+// off here and just specify different registers for different register classes
+def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
+def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
+def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
+def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
+def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
+def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
+def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
+def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
+def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
+def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
+def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
+def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
+def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
+def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
+def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
+def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
+def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
+def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
+
+// All registers between 1000 and 1024 are reserved and cannot be used
+// unless commented in this section
+// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
+// r1020 is used to hold the frame index for local arrays
+// r1019 is used to hold the dynamic stack allocation pointer
+// r1018 is used as a temporary register for handwritten code
+// r1017 is used as a temporary register for handwritten code
+// r1016 is used as a temporary register for load/store code
+// r1015 is used as a temporary register for data segment offset
+// r1014 is used as a temporary register for store code
+// r1013 is used as the section data pointer register
+// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
+// r1009 is used as the frame pointer register
+// r999 is used as the mem register.
+// r998 is used as the return address register.
+//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
+//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
+//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
+//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
+//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
+//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
+def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
+def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
+def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
+def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
+def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
+def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
+def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
+def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
+def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
+def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
+def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
+def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
+def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
+def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
+def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
+def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
+def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
+def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
+def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
+def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
+def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
+def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
+def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
diff --git a/lib/Target/AMDGPU/AMDILSIDevice.cpp b/lib/Target/AMDGPU/AMDILSIDevice.cpp
new file mode 100644
index 0000000..856b00f
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILSIDevice.cpp
@@ -0,0 +1,49 @@
+//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILSIDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+
+AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST)
+{
+}
+AMDGPUSIDevice::~AMDGPUSIDevice()
+{
+}
+
+size_t
+AMDGPUSIDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDGPUSIDevice::getGeneration() const
+{
+ return AMDGPUDeviceInfo::HD7XXX;
+}
+
+std::string
+AMDGPUSIDevice::getDataLayout() const
+{
+ return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
diff --git a/lib/Target/AMDGPU/AMDILSIDevice.h b/lib/Target/AMDGPU/AMDILSIDevice.h
new file mode 100644
index 0000000..6a684cb
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILSIDevice.h
@@ -0,0 +1,45 @@
+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===---------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef _AMDILSIDEVICE_H_
+#define _AMDILSIDEVICE_H_
+#include "AMDILEvergreenDevice.h"
+#include "AMDGPUSubtarget.h"
+
+namespace llvm {
+ class AMDGPUSubtarget;
+//===---------------------------------------------------------------------===//
+// SI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+// The AMDGPUSIDevice is the base class for all Northern Island series of
+// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
+// exception being differences in wavefront size and hardware capabilities. The
+// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
+// integer operations
+
+ class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
+ public:
+ AMDGPUSIDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUSIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual std::string getDataLayout() const;
+ protected:
+ }; // AMDGPUSIDevice
+
+} // namespace llvm
+#endif // _AMDILSIDEVICE_H_
diff --git a/lib/Target/AMDGPU/AMDILUtilityFunctions.h b/lib/Target/AMDGPU/AMDILUtilityFunctions.h
new file mode 100644
index 0000000..e6666f9
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDILUtilityFunctions.h
@@ -0,0 +1,75 @@
+//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file provides helper macros for expanding case statements.
+//
+//===----------------------------------------------------------------------===//
+#ifndef AMDILUTILITYFUNCTIONS_H_
+#define AMDILUTILITYFUNCTIONS_H_
+
+// Macros that are used to help with switch statements for various data types
+// However, these macro's do not return anything unlike the second set below.
+#define ExpandCaseTo32bitIntTypes(Instr) \
+case Instr##_i32:
+
+#define ExpandCaseTo32bitIntTruncTypes(Instr) \
+case Instr##_i32i8: \
+case Instr##_i32i16:
+
+#define ExpandCaseToIntTypes(Instr) \
+ ExpandCaseTo32bitIntTypes(Instr)
+
+#define ExpandCaseToIntTruncTypes(Instr) \
+ ExpandCaseTo32bitIntTruncTypes(Instr)
+
+#define ExpandCaseToFloatTypes(Instr) \
+ case Instr##_f32:
+
+#define ExpandCaseTo32bitScalarTypes(Instr) \
+ ExpandCaseTo32bitIntTypes(Instr) \
+case Instr##_f32:
+
+#define ExpandCaseToAllScalarTypes(Instr) \
+ ExpandCaseToFloatTypes(Instr) \
+ExpandCaseToIntTypes(Instr)
+
+#define ExpandCaseToAllScalarTruncTypes(Instr) \
+ ExpandCaseToFloatTruncTypes(Instr) \
+ExpandCaseToIntTruncTypes(Instr)
+
+#define ExpandCaseToAllTypes(Instr) \
+ExpandCaseToAllScalarTypes(Instr)
+
+#define ExpandCaseToAllTruncTypes(Instr) \
+ExpandCaseToAllScalarTruncTypes(Instr)
+
+// Macros that expand into statements with return values
+#define ExpandCaseTo32bitIntReturn(Instr, Return) \
+case Instr##_i32: return Return##_i32;
+
+#define ExpandCaseToIntReturn(Instr, Return) \
+ ExpandCaseTo32bitIntReturn(Instr, Return)
+
+#define ExpandCaseToFloatReturn(Instr, Return) \
+ case Instr##_f32: return Return##_f32;\
+
+#define ExpandCaseToAllScalarReturn(Instr, Return) \
+ ExpandCaseToFloatReturn(Instr, Return) \
+ExpandCaseToIntReturn(Instr, Return)
+
+// These macros expand to common groupings of RegClass ID's
+#define ExpandCaseTo1CompRegID \
+case AMDGPU::GPRI32RegClassID: \
+case AMDGPU::GPRF32RegClassID:
+
+#define ExpandCaseTo32BitType(Instr) \
+case Instr##_i32: \
+case Instr##_f32:
+
+#endif // AMDILUTILITYFUNCTIONS_H_
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
new file mode 100644
index 0000000..4d6f8d0
--- /dev/null
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -0,0 +1,52 @@
+set(LLVM_TARGET_DEFINITIONS AMDGPU.td)
+
+tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
+tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
+tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
+add_public_tablegen_target(AMDGPUCommonTableGen)
+
+add_llvm_target(AMDGPUCodeGen
+ AMDIL7XXDevice.cpp
+ AMDILCFGStructurizer.cpp
+ AMDILDevice.cpp
+ AMDILDeviceInfo.cpp
+ AMDILEvergreenDevice.cpp
+ AMDILFrameLowering.cpp
+ AMDILIntrinsicInfo.cpp
+ AMDILISelDAGToDAG.cpp
+ AMDILISelLowering.cpp
+ AMDILNIDevice.cpp
+ AMDILPeepholeOptimizer.cpp
+ AMDILSIDevice.cpp
+ AMDGPUAsmPrinter.cpp
+ AMDGPUMCInstLower.cpp
+ AMDGPUSubtarget.cpp
+ AMDGPUTargetMachine.cpp
+ AMDGPUISelLowering.cpp
+ AMDGPUConvertToISA.cpp
+ AMDGPUInstrInfo.cpp
+ AMDGPURegisterInfo.cpp
+ R600ExpandSpecialInstrs.cpp
+ R600InstrInfo.cpp
+ R600ISelLowering.cpp
+ R600KernelParameters.cpp
+ R600MachineFunctionInfo.cpp
+ R600RegisterInfo.cpp
+ SIAssignInterpRegs.cpp
+ SIInstrInfo.cpp
+ SIISelLowering.cpp
+ SIMachineFunctionInfo.cpp
+ SIRegisterInfo.cpp
+ )
+
+add_dependencies(LLVMAMDGPUCodeGen intrinsics_gen)
+
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/AMDGPU/GENERATED_FILES b/lib/Target/AMDGPU/GENERATED_FILES
new file mode 100644
index 0000000..9fa63b9
--- /dev/null
+++ b/lib/Target/AMDGPU/GENERATED_FILES
@@ -0,0 +1,13 @@
+There are 3 files used by this backend that are generated by perl scripts:
+
+- R600RegisterInfo.td
+ + Generated with:
+ perl R600GenRegisterInfo.pl > R600RegisterInfo.td
+
+- R600HwRegInfo.include
+ + Generated with:
+ perl R600GenRegisterInfo.pl
+
+- SIRegisterInfo.td
+ + Generated with:
+ perl SIGenRegisterInfo.pl > SIRegisterInfo.td
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
new file mode 100644
index 0000000..b6ab9b2
--- /dev/null
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -0,0 +1,34 @@
+
+#include "AMDGPUInstPrinter.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
+ printInstruction(MI, OS);
+
+ printAnnotation(OS, Annot);
+}
+
+void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << getRegisterName(Op.getReg());
+ } else if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ O << Op.getFPImm();
+ } else {
+ assert(!"unknown operand type in printOperand");
+ }
+}
+
+void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printOperand(MI, OpNo, O);
+}
+
+#include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
new file mode 100644
index 0000000..62c1a5e
--- /dev/null
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -0,0 +1,34 @@
+
+#ifndef AMDGPUINSTPRINTER_H
+#define AMDGPUINSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class AMDGPUInstPrinter : public MCInstPrinter {
+public:
+ AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ //Autogenerated by tblgen
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+// virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+private:
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+// void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUINSTRPRINTER_H
diff --git a/lib/Target/AMDGPU/InstPrinter/CMakeLists.txt b/lib/Target/AMDGPU/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..511c409
--- /dev/null
+++ b/lib/Target/AMDGPU/InstPrinter/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAMDGPUAsmPrinter
+ AMDGPUInstPrinter.cpp
+ )
+
+add_dependencies(LLVMAMDGPUAsmPrinter AMDGPUCommonTableGen)
diff --git a/lib/Target/AMDGPU/InstPrinter/LLVMBuild.txt b/lib/Target/AMDGPU/InstPrinter/LLVMBuild.txt
new file mode 100644
index 0000000..fdb4384
--- /dev/null
+++ b/lib/Target/AMDGPU/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AMDGPU/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AMDGPUAsmPrinter
+parent = AMDGPU
+required_libraries = MC Support
+add_to_library_groups = AMDGPU
+
diff --git a/lib/Target/AMDGPU/InstPrinter/Makefile b/lib/Target/AMDGPU/InstPrinter/Makefile
new file mode 100644
index 0000000..9ff8bbd
--- /dev/null
+++ b/lib/Target/AMDGPU/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+#===- lib/Target/AMDGPU/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAMDGPUAsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AMDGPU/LLVMBuild.txt b/lib/Target/AMDGPU/LLVMBuild.txt
new file mode 100644
index 0000000..84e44b4
--- /dev/null
+++ b/lib/Target/AMDGPU/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = AMDGPU
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = AMDGPUCodeGen
+parent = AMDGPU
+required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo
+add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
new file mode 100644
index 0000000..75a4756
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -0,0 +1,80 @@
+//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUMCObjectWriter : public MCObjectWriter {
+public:
+ AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ //XXX: Implement if necessary.
+ }
+ virtual void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {
+ assert(!"Not implemented");
+ }
+
+ virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+};
+
+class AMDGPUAsmBackend : public MCAsmBackend {
+public:
+ AMDGPUAsmBackend(const Target &T)
+ : MCAsmBackend() {}
+
+ virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+ virtual unsigned getNumFixupKinds() const { return 0; };
+ virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const { assert(!"Not implemented"); }
+ virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ return false;
+ }
+ virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ assert(!"Not implemented");
+ }
+ virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
+ virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ return true;
+ }
+};
+
+} //End anonymous namespace
+
+void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
+ Asm.writeSectionData(I, Layout);
+ }
+}
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT) {
+ return new AMDGPUAsmBackend(T);
+}
+
+AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
+ raw_ostream &OS) const {
+ return new AMDGPUMCObjectWriter(OS);
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
new file mode 100644
index 0000000..2de4554
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -0,0 +1,88 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCAsmInfo.h"
+
+using namespace llvm;
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
+{
+ HasSingleParameterDotFile = false;
+ WeakDefDirective = 0;
+ //===------------------------------------------------------------------===//
+ HasSubsectionsViaSymbols = true;
+ HasMachoZeroFillDirective = false;
+ HasMachoTBSSDirective = false;
+ HasStaticCtorDtorReferenceInStaticMode = false;
+ LinkerRequiresNonEmptyDwarfLines = true;
+ MaxInstLength = 16;
+ PCSymbol = "$";
+ SeparatorString = "\n";
+ CommentColumn = 40;
+ CommentString = ";";
+ LabelSuffix = ":";
+ GlobalPrefix = "@";
+ PrivateGlobalPrefix = ";.";
+ LinkerPrivateGlobalPrefix = "!";
+ InlineAsmStart = ";#ASMSTART";
+ InlineAsmEnd = ";#ASMEND";
+ AssemblerDialect = 0;
+ AllowQuotesInName = false;
+ AllowNameToStartWithDigit = false;
+ AllowPeriodsInName = false;
+
+ //===--- Data Emission Directives -------------------------------------===//
+ ZeroDirective = ".zero";
+ AsciiDirective = ".ascii\t";
+ AscizDirective = ".asciz\t";
+ Data8bitsDirective = ".byte\t";
+ Data16bitsDirective = ".short\t";
+ Data32bitsDirective = ".long\t";
+ Data64bitsDirective = ".quad\t";
+ GPRel32Directive = 0;
+ SunStyleELFSectionSwitchSyntax = true;
+ UsesELFSectionDirectiveForBSS = true;
+ HasMicrosoftFastStdCallMangling = false;
+
+ //===--- Alignment Information ----------------------------------------===//
+ AlignDirective = ".align\t";
+ AlignmentIsInBytes = true;
+ TextAlignFillValue = 0;
+
+ //===--- Global Variable Emission Directives --------------------------===//
+ GlobalDirective = ".global";
+ ExternDirective = ".extern";
+ HasSetDirective = false;
+ HasAggressiveSymbolFolding = true;
+ LCOMMDirectiveType = LCOMM::None;
+ COMMDirectiveAlignmentIsInBytes = false;
+ HasDotTypeDotSizeDirective = false;
+ HasNoDeadStrip = true;
+ HasSymbolResolver = false;
+ WeakRefDirective = ".weakref\t";
+ LinkOnceDirective = 0;
+ //===--- Dwarf Emission Directives -----------------------------------===//
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+ ExceptionsType = ExceptionHandling::None;
+ DwarfUsesInlineInfoSection = false;
+ DwarfSectionOffsetDirective = ".offset";
+
+}
+
+const char*
+AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
+{
+ return 0;
+}
+
+const MCSection*
+AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const
+{
+ return 0;
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
new file mode 100644
index 0000000..0ca264b
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMCASMINFO_H_
+#define AMDGPUMCASMINFO_H_
+
+#include "llvm/MC/MCAsmInfo.h"
+namespace llvm {
+ class Target;
+ class StringRef;
+
+ class AMDGPUMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
+ const char*
+ getDataASDirective(unsigned int Size, unsigned int AS) const;
+ const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
+ };
+} // namespace llvm
+#endif // AMDGPUMCASMINFO_H_
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
new file mode 100644
index 0000000..a75a841
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -0,0 +1,59 @@
+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUCODEEMITTER_H
+#define AMDGPUCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+ class MCInst;
+ class MCOperand;
+
+ class AMDGPUMCCodeEmitter : public MCCodeEmitter {
+ public:
+
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+
+ virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+ virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+ virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
+ return Value;
+ }
+ virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+ virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+ };
+
+} // End namespace llvm
+
+#endif // AMDGPUCODEEMITTER_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
new file mode 100644
index 0000000..87f6372
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -0,0 +1,112 @@
+//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "AMDGPUMCAsmInfo.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AMDGPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createAMDGPUMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAMDGPUMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAMDGPUMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo * X = new MCSubtargetInfo();
+ InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new AMDGPUInstPrinter(MAI, MII, MRI);
+}
+
+static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
+ return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
+ } else {
+ return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
+ }
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ return createPureStreamer(Ctx, MAB, _OS, _Emitter);
+}
+
+extern "C" void LLVMInitializeAMDGPUTargetMC() {
+
+ RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
+
+ TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
+
+ TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
+
+ TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
+
+ TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
+
+ TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
+
+ TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
+
+ TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
+
+ TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
new file mode 100644
index 0000000..e067ed5
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -0,0 +1,53 @@
+//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef AMDGPUMCTARGETDESC_H
+#define AMDGPUMCTARGETDESC_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+
+extern Target TheAMDGPUTarget;
+
+MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT);
+} // End llvm namespace
+
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#endif // AMDGPUMCTARGETDESC_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..9d921df
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+add_llvm_library(LLVMAMDGPUDesc
+ AMDGPUAsmBackend.cpp
+ AMDGPUMCTargetDesc.cpp
+ AMDGPUMCAsmInfo.cpp
+ R600MCCodeEmitter.cpp
+ SIMCCodeEmitter.cpp
+ )
+
+add_dependencies(LLVMAMDGPUDesc AMDGPUCommonTableGen)
diff --git a/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 0000000..8f46a98
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AMDGPUDesc
+parent = AMDGPU
+required_libraries = AMDGPUAsmPrinter AMDGPUInfo MC
+add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/MCTargetDesc/Makefile b/lib/Target/AMDGPU/MCTargetDesc/Makefile
new file mode 100644
index 0000000..5ad6866
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAMDGPUDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
new file mode 100644
index 0000000..6f0d7f5
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -0,0 +1,669 @@
+//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This code emitters outputs bytecode that is understood by the r600g driver
+// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
+// except that the size of the instruction fields are rounded up to the
+// nearest byte.
+//
+// [1] http://www.mesa3d.org/
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600Defines.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <stdio.h>
+
+#define SRC_BYTE_COUNT 11
+#define DST_BYTE_COUNT 5
+
+using namespace llvm;
+
+namespace {
+
+class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
+ R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCRegisterInfo &MRI;
+ const MCSubtargetInfo &STI;
+ MCContext &Ctx;
+
+public:
+
+ R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+ const MCSubtargetInfo &sti, MCContext &ctx)
+ : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+
+ /// EncodeInstruction - Encode the instruction and write it to the OS.
+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Reutrn the encoding for an MCOperand.
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+private:
+
+ void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const;
+ void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
+ void EmitDst(const MCInst &MI, raw_ostream &OS) const;
+ void EmitALU(const MCInst &MI, unsigned numSrc,
+ SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const;
+ void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const;
+ void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
+
+ void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
+
+ void EmitByte(unsigned int byte, raw_ostream &OS) const;
+
+ void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
+
+ void Emit(uint32_t value, raw_ostream &OS) const;
+ void Emit(uint64_t value, raw_ostream &OS) const;
+
+ unsigned getHWRegIndex(unsigned reg) const;
+ unsigned getHWRegChan(unsigned reg) const;
+ unsigned getHWReg(unsigned regNo) const;
+
+ bool isFCOp(unsigned opcode) const;
+ bool isTexOp(unsigned opcode) const;
+ bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
+
+ /// getHWRegChanGen - Get the register's channel. Implemented in
+ /// R600HwRegInfo.include.
+ unsigned getHWRegChanGen(unsigned int Reg) const;
+};
+
+} // End anonymous namespace
+
+enum RegElement {
+ ELEMENT_X = 0,
+ ELEMENT_Y,
+ ELEMENT_Z,
+ ELEMENT_W
+};
+
+enum InstrTypes {
+ INSTR_ALU = 0,
+ INSTR_TEX,
+ INSTR_FC,
+ INSTR_NATIVE,
+ INSTR_VTX
+};
+
+enum FCInstr {
+ FC_IF = 0,
+ FC_IF_INT,
+ FC_ELSE,
+ FC_ENDIF,
+ FC_BGNLOOP,
+ FC_ENDLOOP,
+ FC_BREAK,
+ FC_BREAK_NZ_INT,
+ FC_CONTINUE,
+ FC_BREAK_Z_INT,
+ FC_BREAK_NZ
+};
+
+enum TextureTypes {
+ TEXTURE_1D = 1,
+ TEXTURE_2D,
+ TEXTURE_3D,
+ TEXTURE_CUBE,
+ TEXTURE_RECT,
+ TEXTURE_SHADOW1D,
+ TEXTURE_SHADOW2D,
+ TEXTURE_SHADOWRECT,
+ TEXTURE_1D_ARRAY,
+ TEXTURE_2D_ARRAY,
+ TEXTURE_SHADOW1D_ARRAY,
+ TEXTURE_SHADOW2D_ARRAY
+};
+
+MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
+}
+
+void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (isTexOp(MI.getOpcode())) {
+ EmitTexInstr(MI, Fixups, OS);
+ } else if (isFCOp(MI.getOpcode())){
+ EmitFCInstr(MI, OS);
+ } else if (MI.getOpcode() == AMDGPU::RETURN ||
+ MI.getOpcode() == AMDGPU::BUNDLE ||
+ MI.getOpcode() == AMDGPU::KILL) {
+ return;
+ } else {
+ switch(MI.getOpcode()) {
+ case AMDGPU::RAT_WRITE_CACHELESS_eg:
+ {
+ uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_NATIVE, OS);
+ Emit(inst, OS);
+ break;
+ }
+ case AMDGPU::VTX_READ_PARAM_i32_eg:
+ case AMDGPU::VTX_READ_PARAM_f32_eg:
+ case AMDGPU::VTX_READ_GLOBAL_i32_eg:
+ case AMDGPU::VTX_READ_GLOBAL_f32_eg:
+ case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
+ case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
+ {
+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+ uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
+
+ EmitByte(INSTR_VTX, OS);
+ Emit(InstWord01, OS);
+ Emit(InstWord2, OS);
+ break;
+ }
+
+ default:
+ EmitALUInstr(MI, Fixups, OS);
+ break;
+ }
+ }
+}
+
+void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const {
+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+ unsigned NumOperands = MI.getNumOperands();
+
+ if(MCDesc.findFirstPredOperandIdx() > -1)
+ NumOperands--;
+
+ if (GET_FLAG_OPERAND_IDX(MCDesc.TSFlags) != 0)
+ NumOperands--;
+
+ if(MI.getOpcode() == AMDGPU::PRED_X)
+ NumOperands = 2;
+
+ // XXX Check if instruction writes a result
+ if (NumOperands < 1) {
+ return;
+ }
+
+ // Emit instruction type
+ EmitByte(0, OS);
+
+ unsigned int OpIndex;
+ for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) {
+ // Literal constants are always stored as the last operand.
+ if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) {
+ break;
+ }
+ EmitSrc(MI, OpIndex, OS);
+ }
+
+ // Emit zeros for unused sources
+ for ( ; OpIndex < 4; OpIndex++) {
+ EmitNullBytes(SRC_BYTE_COUNT, OS);
+ }
+
+ EmitDst(MI, OS);
+
+ EmitALU(MI, NumOperands - 1, Fixups, OS);
+}
+
+void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
+ raw_ostream &OS) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ union {
+ float f;
+ uint32_t i;
+ } Value;
+ Value.i = 0;
+ // Emit the source select (2 bytes). For GPRs, this is the register index.
+ // For other potential instruction operands, (e.g. constant registers) the
+ // value of the source select is defined in the r600isa docs.
+ if (MO.isReg()) {
+ unsigned reg = MO.getReg();
+ EmitTwoBytes(getHWReg(reg), OS);
+ if (reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+ if (ImmOp.isFPImm()) {
+ Value.f = ImmOp.getFPImm();
+ } else {
+ assert(ImmOp.isImm());
+ Value.i = ImmOp.getImm();
+ }
+ }
+ } else {
+ // XXX: Handle other operand types.
+ EmitTwoBytes(0, OS);
+ }
+
+ // Emit the source channel (1 byte)
+ if (MO.isReg()) {
+ EmitByte(getHWRegChan(MO.getReg()), OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // XXX: Emit isNegated (1 byte)
+ if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
+ && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
+ (MO.isReg() &&
+ (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // Emit isAbsolute (1 byte)
+ if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // XXX: Emit relative addressing mode (1 byte)
+ EmitByte(0, OS);
+
+ // Emit kc_bank, This will be adjusted later by r600_asm
+ EmitByte(0, OS);
+
+ // Emit the literal value, if applicable (4 bytes).
+ Emit(Value.i, OS);
+
+}
+
+void R600MCCodeEmitter::EmitDst(const MCInst &MI, raw_ostream &OS) const {
+
+ const MCOperand &MO = MI.getOperand(0);
+ if (MO.isReg() && MO.getReg() != AMDGPU::PREDICATE_BIT) {
+ // Emit the destination register index (1 byte)
+ EmitByte(getHWReg(MO.getReg()), OS);
+
+ // Emit the element of the destination register (1 byte)
+ EmitByte(getHWRegChan(MO.getReg()), OS);
+
+ // Emit isClamped (1 byte)
+ if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) {
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // Emit writemask (1 byte).
+ if (isFlagSet(MI, 0, MO_FLAG_MASK)) {
+ EmitByte(0, OS);
+ } else {
+ EmitByte(1, OS);
+ }
+
+ // XXX: Emit relative addressing mode
+ EmitByte(0, OS);
+ } else {
+ // XXX: Handle other operand types. Are there any for destination regs?
+ EmitNullBytes(DST_BYTE_COUNT, OS);
+ }
+}
+
+void R600MCCodeEmitter::EmitALU(const MCInst &MI, unsigned numSrc,
+ SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const {
+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+
+ // Emit the instruction (2 bytes)
+ EmitTwoBytes(getBinaryCodeForInstr(MI, Fixups), OS);
+
+ // Emit IsLast (for this instruction group) (1 byte)
+ if (isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) {
+ EmitByte(0, OS);
+ } else {
+ EmitByte(1, OS);
+ }
+
+ // Emit isOp3 (1 byte)
+ if (numSrc == 3) {
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // XXX: Emit push modifier
+ if(isFlagSet(MI, 1, MO_FLAG_PUSH)) {
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // XXX: Emit predicate (1 byte)
+ int PredIdx = MCDesc.findFirstPredOperandIdx();
+ if (PredIdx > -1)
+ switch(MI.getOperand(PredIdx).getReg()) {
+ case AMDGPU::PRED_SEL_ZERO:
+ EmitByte(2, OS);
+ break;
+ case AMDGPU::PRED_SEL_ONE:
+ EmitByte(3, OS);
+ break;
+ default:
+ EmitByte(0, OS);
+ break;
+ }
+ else {
+ EmitByte(0, OS);
+ }
+
+
+ // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
+ // r600_asm.c sets it.
+ EmitByte(0, OS);
+
+ // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for.
+ EmitByte(0, OS);
+
+ // XXX: Emit OMOD (1 byte) Not implemented.
+ EmitByte(0, OS);
+
+ // XXX: Emit index_mode. I think this is for indirect addressing, so we
+ // don't need to worry about it.
+ EmitByte(0, OS);
+}
+
+void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const {
+
+ unsigned opcode = MI.getOpcode();
+ bool hasOffsets = (opcode == AMDGPU::TEX_LD);
+ unsigned op_offset = hasOffsets ? 3 : 0;
+ int64_t sampler = MI.getOperand(op_offset+2).getImm();
+ int64_t textureType = MI.getOperand(op_offset+3).getImm();
+ unsigned srcSelect[4] = {0, 1, 2, 3};
+
+ // Emit instruction type
+ EmitByte(1, OS);
+
+ // Emit instruction
+ EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
+
+ // XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
+ EmitByte(sampler + 1 + 1, OS);
+
+ // Emit source register
+ EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
+
+ // XXX: Emit src isRelativeAddress
+ EmitByte(0, OS);
+
+ // Emit destination register
+ EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
+
+ // XXX: Emit dst isRealtiveAddress
+ EmitByte(0, OS);
+
+ // XXX: Emit dst select
+ EmitByte(0, OS); // X
+ EmitByte(1, OS); // Y
+ EmitByte(2, OS); // Z
+ EmitByte(3, OS); // W
+
+ // XXX: Emit lod bias
+ EmitByte(0, OS);
+
+ // XXX: Emit coord types
+ unsigned coordType[4] = {1, 1, 1, 1};
+
+ if (textureType == TEXTURE_RECT
+ || textureType == TEXTURE_SHADOWRECT) {
+ coordType[ELEMENT_X] = 0;
+ coordType[ELEMENT_Y] = 0;
+ }
+
+ if (textureType == TEXTURE_1D_ARRAY
+ || textureType == TEXTURE_SHADOW1D_ARRAY) {
+ if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+ coordType[ELEMENT_Y] = 0;
+ } else {
+ coordType[ELEMENT_Z] = 0;
+ srcSelect[ELEMENT_Z] = ELEMENT_Y;
+ }
+ } else if (textureType == TEXTURE_2D_ARRAY
+ || textureType == TEXTURE_SHADOW2D_ARRAY) {
+ coordType[ELEMENT_Z] = 0;
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ EmitByte(coordType[i], OS);
+ }
+
+ // XXX: Emit offsets
+ if (hasOffsets)
+ for (unsigned i = 2; i < 5; i++)
+ EmitByte(MI.getOperand(i).getImm()<<1, OS);
+ else
+ EmitNullBytes(3, OS);
+
+ // Emit sampler id
+ EmitByte(sampler, OS);
+
+ // XXX:Emit source select
+ if ((textureType == TEXTURE_SHADOW1D
+ || textureType == TEXTURE_SHADOW2D
+ || textureType == TEXTURE_SHADOWRECT
+ || textureType == TEXTURE_SHADOW1D_ARRAY)
+ && opcode != AMDGPU::TEX_SAMPLE_C_L
+ && opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+ srcSelect[ELEMENT_W] = ELEMENT_Z;
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ EmitByte(srcSelect[i], OS);
+ }
+}
+
+void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
+
+ // Emit instruction type
+ EmitByte(INSTR_FC, OS);
+
+ // Emit SRC
+ unsigned NumOperands = MI.getNumOperands();
+ if (NumOperands > 0) {
+ assert(NumOperands == 1);
+ EmitSrc(MI, 0, OS);
+ } else {
+ EmitNullBytes(SRC_BYTE_COUNT, OS);
+ }
+
+ // Emit FC Instruction
+ enum FCInstr instr;
+ switch (MI.getOpcode()) {
+ case AMDGPU::BREAK_LOGICALZ_f32:
+ instr = FC_BREAK;
+ break;
+ case AMDGPU::BREAK_LOGICALNZ_f32:
+ instr = FC_BREAK_NZ;
+ break;
+ case AMDGPU::BREAK_LOGICALNZ_i32:
+ instr = FC_BREAK_NZ_INT;
+ break;
+ case AMDGPU::BREAK_LOGICALZ_i32:
+ instr = FC_BREAK_Z_INT;
+ break;
+ case AMDGPU::CONTINUE_LOGICALNZ_f32:
+ case AMDGPU::CONTINUE_LOGICALNZ_i32:
+ instr = FC_CONTINUE;
+ break;
+ case AMDGPU::IF_LOGICALNZ_f32:
+ instr = FC_IF;
+ case AMDGPU::IF_LOGICALNZ_i32:
+ instr = FC_IF_INT;
+ break;
+ case AMDGPU::IF_LOGICALZ_f32:
+ abort();
+ break;
+ case AMDGPU::ELSE:
+ instr = FC_ELSE;
+ break;
+ case AMDGPU::ENDIF:
+ instr = FC_ENDIF;
+ break;
+ case AMDGPU::ENDLOOP:
+ instr = FC_ENDLOOP;
+ break;
+ case AMDGPU::WHILELOOP:
+ instr = FC_BGNLOOP;
+ break;
+ default:
+ abort();
+ break;
+ }
+ EmitByte(instr, OS);
+}
+
+void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
+ raw_ostream &OS) const {
+
+ for (unsigned int i = 0; i < ByteCount; i++) {
+ EmitByte(0, OS);
+ }
+}
+
+void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
+ OS.write((uint8_t) Byte & 0xff);
+}
+
+void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
+ raw_ostream &OS) const {
+ OS.write((uint8_t) (Bytes & 0xff));
+ OS.write((uint8_t) ((Bytes >> 8) & 0xff));
+}
+
+void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
+ for (unsigned i = 0; i < 4; i++) {
+ OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
+ }
+}
+
+void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
+ for (unsigned i = 0; i < 8; i++) {
+ EmitByte((Value >> (8 * i)) & 0xff, OS);
+ }
+}
+
+unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
+ switch(reg) {
+ case AMDGPU::ZERO:
+ case AMDGPU::ONE:
+ case AMDGPU::ONE_INT:
+ case AMDGPU::NEG_ONE:
+ case AMDGPU::HALF:
+ case AMDGPU::NEG_HALF:
+ case AMDGPU::ALU_LITERAL_X:
+ case AMDGPU::PREDICATE_BIT:
+ case AMDGPU::PRED_SEL_OFF:
+ case AMDGPU::PRED_SEL_ZERO:
+ case AMDGPU::PRED_SEL_ONE:
+ return 0;
+ default: return getHWRegChanGen(reg);
+ }
+}
+unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
+ unsigned HWReg;
+
+ HWReg = MRI.getEncodingValue(RegNo);
+ if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(RegNo)) {
+ HWReg += 512;
+ }
+ return HWReg;
+}
+
+uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ if (MO.isReg()) {
+ return getHWReg(MO.getReg());
+ } else {
+ return MO.getImm();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Encoding helper functions
+//===----------------------------------------------------------------------===//
+
+bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::BREAK_LOGICALZ_f32:
+ case AMDGPU::BREAK_LOGICALNZ_i32:
+ case AMDGPU::BREAK_LOGICALZ_i32:
+ case AMDGPU::BREAK_LOGICALNZ_f32:
+ case AMDGPU::CONTINUE_LOGICALNZ_f32:
+ case AMDGPU::IF_LOGICALNZ_i32:
+ case AMDGPU::IF_LOGICALZ_f32:
+ case AMDGPU::ELSE:
+ case AMDGPU::ENDIF:
+ case AMDGPU::ENDLOOP:
+ case AMDGPU::IF_LOGICALNZ_f32:
+ case AMDGPU::WHILELOOP:
+ return true;
+ }
+}
+
+bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ return true;
+ }
+}
+
+bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
+ unsigned Flag) const {
+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+ unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
+ if (FlagIndex == 0) {
+ return false;
+ }
+ assert(MI.getOperand(FlagIndex).isImm());
+ return !!((MI.getOperand(FlagIndex).getImm() >>
+ (NUM_MO_FLAGS * Operand)) & Flag);
+}
+#define R600RegisterInfo R600MCCodeEmitter
+#include "R600HwRegInfo.include"
+#undef R600RegisterInfo
+
+#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
new file mode 100644
index 0000000..c06afd3
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -0,0 +1,297 @@
+//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The SI code emitter produces machine code that can be executed directly on
+// the GPU device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define LITERAL_REG 255
+#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
+#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
+
+
+// These must be kept in sync with SIInstructions.td and also the
+// InstrEncodingInfo array in SIInstrInfo.cpp.
+//
+// NOTE: This enum is only used to identify the encoding type within LLVM,
+// the actual encoding type that is part of the instruction format is different
+namespace SIInstrEncodingType {
+ enum Encoding {
+ EXP = 0,
+ LDS = 1,
+ MIMG = 2,
+ MTBUF = 3,
+ MUBUF = 4,
+ SMRD = 5,
+ SOP1 = 6,
+ SOP2 = 7,
+ SOPC = 8,
+ SOPK = 9,
+ SOPP = 10,
+ VINTRP = 11,
+ VOP1 = 12,
+ VOP2 = 13,
+ VOP3 = 14,
+ VOPC = 15
+ };
+}
+
+using namespace llvm;
+
+namespace {
+class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
+ SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCRegisterInfo &MRI;
+ const MCSubtargetInfo &STI;
+ MCContext &Ctx;
+
+public:
+ SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+ const MCSubtargetInfo &sti, MCContext &ctx)
+ : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+
+ ~SIMCCodeEmitter() { }
+
+ /// EncodeInstruction - Encode the instruction and write it to the OS.
+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Reutrn the encoding for an MCOperand.
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+public:
+
+ /// GPRAlign - Encode a sequence of registers with the correct alignment.
+ unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
+
+ /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
+ virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const;
+
+ /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used
+ virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const;
+
+ /// i32LiteralEncode - Encode an i32 literal this is used as an operand
+ /// for an instruction in place of a register.
+ virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const;
+
+ /// SMRDmemriEncode - Encoding for SMRD indexed loads
+ virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const;
+
+ /// VOPPostEncode - Post-Encoder method for VOP instructions
+ virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
+
+private:
+
+ ///getEncodingType = Return this SIInstrEncodingType for this instruction.
+ unsigned getEncodingType(const MCInst &MI) const;
+
+ ///getEncodingBytes - Get then size in bytes of this instructions encoding.
+ unsigned getEncodingBytes(const MCInst &MI) const;
+
+ /// getRegBinaryCode - Returns the hardware encoding for a register
+ unsigned getRegBinaryCode(unsigned reg) const;
+
+ /// getHWRegNum - Generated function that returns the hardware encoding for
+ /// a register
+ unsigned getHWRegNum(unsigned reg) const;
+
+};
+
+} // End anonymous namespace
+
+MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
+}
+
+void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
+ unsigned bytes = getEncodingBytes(MI);
+ for (unsigned i = 0; i < bytes; i++) {
+ OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
+ }
+}
+
+uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ return getRegBinaryCode(MO.getReg());
+ } else if (MO.isImm()) {
+ return MO.getImm();
+ } else if (MO.isFPImm()) {
+ // XXX: Not all instructions can use inline literals
+ // XXX: We should make sure this is a 32-bit constant
+ return LITERAL_REG;
+ } else{
+ llvm_unreachable("Encoding of this operand type is not supported yet.");
+ }
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom Operand Encodings
+//===----------------------------------------------------------------------===//
+
+unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
+ unsigned shift) const {
+ unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
+ return regCode >> shift;
+ return 0;
+}
+unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
+ unsigned OpNo ,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ return GPRAlign(MI, OpNo, 1);
+}
+
+unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
+ unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ return GPRAlign(MI, OpNo, 2);
+}
+
+uint64_t SIMCCodeEmitter::i32LiteralEncode(const MCInst &MI,
+ unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
+}
+
+#define SMRD_OFFSET_MASK 0xff
+#define SMRD_IMM_SHIFT 8
+#define SMRD_SBASE_MASK 0x3f
+#define SMRD_SBASE_SHIFT 9
+/// SMRDmemriEncode - This function is responsibe for encoding the offset
+/// and the base ptr for SMRD instructions it should return a bit string in
+/// this format:
+///
+/// OFFSET = bits{7-0}
+/// IMM = bits{8}
+/// SBASE = bits{14-9}
+///
+uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ uint32_t Encoding;
+
+ const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
+
+ //XXX: Use this function for SMRD loads with register offsets
+ assert(OffsetOp.isImm());
+
+ Encoding =
+ (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
+ | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
+ | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
+ ;
+
+ return Encoding;
+}
+
+//===----------------------------------------------------------------------===//
+// Post Encoder Callbacks
+//===----------------------------------------------------------------------===//
+
+uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
+ unsigned encodingType = getEncodingType(MI);
+ unsigned numSrcOps;
+ unsigned vgprBitOffset;
+
+ if (encodingType == SIInstrEncodingType::VOP3) {
+ numSrcOps = 3;
+ vgprBitOffset = 32;
+ } else {
+ numSrcOps = 1;
+ vgprBitOffset = 0;
+ }
+
+ // Add one to skip over the destination reg operand.
+ for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
+ const MCOperand &MO = MI.getOperand(opIdx);
+ if (MO.isReg()) {
+ unsigned reg = MI.getOperand(opIdx).getReg();
+ if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
+ AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
+ Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
+ }
+ } else if (MO.isFPImm()) {
+ // XXX: Not all instructions can use inline literals
+ // XXX: We should make sure this is a 32-bit constant
+ Value |= ((uint64_t)MO.getFPImm()) << 32;
+ }
+ }
+ return Value;
+}
+
+//===----------------------------------------------------------------------===//
+// Encoding helper functions
+//===----------------------------------------------------------------------===//
+
+unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
+ return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
+}
+
+unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
+
+ // Instructions with literal constants are expanded to 64-bits, and
+ // the constant is stored in bits [63:32]
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ if (MI.getOperand(i).isFPImm()) {
+ return 8;
+ }
+ }
+
+ // This instruction always has a literal
+ if (MI.getOpcode() == AMDGPU::S_MOV_IMM_I32) {
+ return 8;
+ }
+
+ unsigned encoding_type = getEncodingType(MI);
+ switch (encoding_type) {
+ case SIInstrEncodingType::EXP:
+ case SIInstrEncodingType::LDS:
+ case SIInstrEncodingType::MUBUF:
+ case SIInstrEncodingType::MTBUF:
+ case SIInstrEncodingType::MIMG:
+ case SIInstrEncodingType::VOP3:
+ return 8;
+ default:
+ return 4;
+ }
+}
+
+
+unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
+ switch (reg) {
+ case AMDGPU::M0: return 124;
+ case AMDGPU::SREG_LIT_0: return 128;
+ default: return MRI.getEncodingValue(reg);
+ }
+}
+
diff --git a/lib/Target/AMDGPU/Makefile b/lib/Target/AMDGPU/Makefile
new file mode 100644
index 0000000..7303f57
--- /dev/null
+++ b/lib/Target/AMDGPU/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/AMDGPU/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAMDGPUCodeGen
+TARGET = AMDGPU
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
+ AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \
+ AMDGPUGenMCCodeEmitter.inc AMDGPUGenCallingConv.inc \
+ AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
+ AMDGPUGenAsmWriter.inc
+
+DIRS = InstPrinter TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
new file mode 100644
index 0000000..2f13fe1
--- /dev/null
+++ b/lib/Target/AMDGPU/Processors.td
@@ -0,0 +1,28 @@
+//===-- Processors.td - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AMDIL processors supported.
+//
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
+: Processor<Name, itin, Features>;
+def : Proc<"rv710", R600_EG_Itin, []>;
+def : Proc<"rv730", R600_EG_Itin, []>;
+def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
+def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
+
diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h
new file mode 100644
index 0000000..4db199a
--- /dev/null
+++ b/lib/Target/AMDGPU/R600Defines.h
@@ -0,0 +1,22 @@
+//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Operand Flags
+#define MO_FLAG_CLAMP (1 << 0)
+#define MO_FLAG_NEG (1 << 1)
+#define MO_FLAG_ABS (1 << 2)
+#define MO_FLAG_MASK (1 << 3)
+#define MO_FLAG_PUSH (1 << 4)
+#define MO_FLAG_NOT_LAST (1 << 5)
+#define NUM_MO_FLAGS 6
+
+// Helper for finding getting the operand index for the instruction flags
+// operand.
+#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
new file mode 100644
index 0000000..6ab7793
--- /dev/null
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -0,0 +1,163 @@
+//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Vector, Reduction, and Cube instructions need to fill the entire instruction
+// group to work correctly. This pass expands these individual instructions
+// into several instructions that will completely fill the instruction group.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+
+public:
+ R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "R600 Expand special instructions pass";
+ }
+};
+
+} // End anonymous namespace
+
+char R600ExpandSpecialInstrsPass::ID = 0;
+
+FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
+ return new R600ExpandSpecialInstrsPass(TM);
+}
+
+bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
+
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ MachineBasicBlock::iterator I = MBB.begin();
+ while (I != MBB.end()) {
+ MachineInstr &MI = *I;
+ I = llvm::next(I);
+
+ bool IsReduction = TII->isReductionOp(MI.getOpcode());
+ bool IsVector = TII->isVector(MI);
+ bool IsCube = TII->isCubeOp(MI.getOpcode());
+ if (!IsReduction && !IsVector && !IsCube) {
+ continue;
+ }
+
+ // Expand the instruction
+ //
+ // Reduction instructions:
+ // T0_X = DP4 T1_XYZW, T2_XYZW
+ // becomes:
+ // TO_X = DP4 T1_X, T2_X
+ // TO_Y (write masked) = DP4 T1_Y, T2_Y
+ // TO_Z (write masked) = DP4 T1_Z, T2_Z
+ // TO_W (write masked) = DP4 T1_W, T2_W
+ //
+ // Vector instructions:
+ // T0_X = MULLO_INT T1_X, T2_X
+ // becomes:
+ // T0_X = MULLO_INT T1_X, T2_X
+ // T0_Y (write masked) = MULLO_INT T1_X, T2_X
+ // T0_Z (write masked) = MULLO_INT T1_X, T2_X
+ // T0_W (write masked) = MULLO_INT T1_X, T2_X
+ //
+ // Cube instructions:
+ // T0_XYZW = CUBE T1_XYZW
+ // becomes:
+ // TO_X = CUBE T1_Z, T1_Y
+ // T0_Y = CUBE T1_Z, T1_X
+ // T0_Z = CUBE T1_X, T1_Z
+ // T0_W = CUBE T1_Y, T1_Z
+ for (unsigned Chan = 0; Chan < 4; Chan++) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned Src0 = MI.getOperand(1).getReg();
+ unsigned Src1 = 0;
+
+ // Determine the correct source registers
+ if (!IsCube) {
+ Src1 = MI.getOperand(2).getReg();
+ }
+ if (IsReduction) {
+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+ Src0 = TRI.getSubReg(Src0, SubRegIndex);
+ Src1 = TRI.getSubReg(Src1, SubRegIndex);
+ } else if (IsCube) {
+ static const int CubeSrcSwz[] = {2, 2, 0, 1};
+ unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
+ unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
+ Src1 = TRI.getSubReg(Src0, SubRegIndex1);
+ Src0 = TRI.getSubReg(Src0, SubRegIndex0);
+ }
+
+ // Determine the correct destination registers;
+ unsigned Flags = 0;
+ if (IsCube) {
+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+ DstReg = TRI.getSubReg(DstReg, SubRegIndex);
+ } else {
+ // Mask the write if the original instruction does not write to
+ // the current Channel.
+ Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
+ unsigned DstBase = TRI.getEncodingValue(DstReg);
+ DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+ }
+
+ // Set the IsLast bit
+ Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
+
+ // Add the new instruction
+ unsigned Opcode;
+ if (IsCube) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::CUBE_r600_pseudo:
+ Opcode = AMDGPU::CUBE_r600_real;
+ break;
+ case AMDGPU::CUBE_eg_pseudo:
+ Opcode = AMDGPU::CUBE_eg_real;
+ break;
+ default:
+ assert(!"Unknown CUBE instruction");
+ Opcode = 0;
+ break;
+ }
+ } else {
+ Opcode = MI.getOpcode();
+ }
+ MachineInstr *NewMI =
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
+ .addReg(Src0)
+ .addReg(Src1)
+ .addImm(0); // Flag
+
+ NewMI->setIsInsideBundle(Chan != 0);
+ TII->addFlag(NewMI, 0, Flags);
+ }
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/AMDGPU/R600GenRegisterInfo.pl b/lib/Target/AMDGPU/R600GenRegisterInfo.pl
new file mode 100644
index 0000000..0100560
--- /dev/null
+++ b/lib/Target/AMDGPU/R600GenRegisterInfo.pl
@@ -0,0 +1,200 @@
+#===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+#
+# This perl script prints to stdout .td code to be used as R600RegisterInfo.td
+# it also generates a file called R600HwRegInfo.include, which contains helper
+# functions for determining the hw encoding of registers.
+#
+#===------------------------------------------------------------------------===#
+
+use strict;
+use warnings;
+
+use constant CONST_REG_COUNT => 100;
+use constant TEMP_REG_COUNT => 128;
+
+my $CREG_MAX = CONST_REG_COUNT - 1;
+my $TREG_MAX = TEMP_REG_COUNT - 1;
+
+print <<STRING;
+
+class R600Reg <string name, bits<16> encoding> : Register<name> {
+ let Namespace = "AMDGPU";
+ let HWEncoding = encoding;
+}
+
+class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
+ RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
+ let HWEncoding = encoding;
+}
+
+STRING
+
+my $i;
+
+### REG DEFS ###
+
+my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C");
+my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T");
+
+my @t128reg;
+my @treg_x;
+for (my $i = 0; $i < TEMP_REG_COUNT; $i++) {
+ my $name = "T$i\_XYZW";
+ print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W], $i >;\n};
+ $t128reg[$i] = $name;
+ $treg_x[$i] = "T$i\_X";
+ if ($i % 10 == 0) {
+ $t128reg[$i] .= "\n";
+ $treg_x[$i] .= "\n";
+ }
+}
+
+my $treg_string = join(",", @treg_list);
+my $creg_list = join(",", @creg_list);
+my $t128_string = join(",", @t128reg);
+my $treg_x_string = join(",", @treg_x);
+print <<STRING;
+
+class RegSet <dag s> {
+ dag set = s;
+}
+
+def ZERO : R600Reg<"0.0", 248>;
+def ONE : R600Reg<"1.0", 249>;
+def NEG_ONE : R600Reg<"-1.0", 249>;
+def ONE_INT : R600Reg<"1", 250>;
+def HALF : R600Reg<"0.5", 252>;
+def NEG_HALF : R600Reg<"-0.5", 252>;
+def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
+def PV_X : R600Reg<"pv.x", 254>;
+def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
+def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
+def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 0>;
+def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 0>;
+
+def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ $creg_list)>;
+
+def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ $treg_string)>;
+
+def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ $treg_x_string)>;
+
+def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ R600_TReg32,
+ R600_CReg32,
+ ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
+
+def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
+ PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
+
+def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
+ PREDICATE_BIT)>;
+
+def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
+ $t128_string)>
+{
+ let CopyCost = -1;
+}
+
+STRING
+
+my %index_map;
+my %chan_map;
+
+for ($i = 0; $i <= $#creg_list; $i++) {
+ push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]);
+ push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]);
+}
+
+for ($i = 0; $i <= $#treg_list; $i++) {
+ push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]);
+ push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]);
+}
+
+for ($i = 0; $i <= $#t128reg; $i++) {
+ push(@{$index_map{$i}}, $t128reg[$i]);
+ push(@{$chan_map{'X'}}, $t128reg[$i]);
+}
+
+open(OUTFILE, ">", "R600HwRegInfo.include");
+
+print OUTFILE <<STRING;
+
+unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const
+{
+ switch(reg) {
+ default: assert(!"Unknown register"); return 0;
+STRING
+
+foreach my $key (keys(%chan_map)) {
+ foreach my $reg (@{$chan_map{$key}}) {
+ chomp($reg);
+ print OUTFILE " case AMDGPU::$reg:\n";
+ }
+ my $val;
+ if ($key eq 'X') {
+ $val = 0;
+ } elsif ($key eq 'Y') {
+ $val = 1;
+ } elsif ($key eq 'Z') {
+ $val = 2;
+ } elsif ($key eq 'W') {
+ $val = 3;
+ } else {
+ die("Unknown chan value; $key");
+ }
+ print OUTFILE " return $val;\n\n";
+}
+
+print OUTFILE " }\n}\n\n";
+
+sub print_reg_defs {
+ my ($count, $prefix) = @_;
+
+ my @reg_list;
+
+ for ($i = 0; $i < $count; $i++) {
+ my $hw_index = get_hw_index($i);
+ my $chan= get_chan_str($i);
+ my $name = "$prefix$hw_index\_$chan";
+ print qq{def $name : R600Reg <"$prefix$hw_index.$chan", $hw_index>;\n};
+ $reg_list[$i] = $name;
+ if ($i % 10 == 0) {
+ $reg_list[$i] .= "\n";
+ }
+ }
+ return @reg_list;
+}
+
+#Helper functions
+sub get_hw_index {
+ my ($index) = @_;
+ return int($index / 4);
+}
+
+sub get_chan_str {
+ my ($index) = @_;
+ my $chan = $index % 4;
+ if ($chan == 0 ) {
+ return 'X';
+ } elsif ($chan == 1) {
+ return 'Y';
+ } elsif ($chan == 2) {
+ return 'Z';
+ } elsif ($chan == 3) {
+ return 'W';
+ } else {
+ die("Unknown chan value: $chan");
+ }
+}
diff --git a/lib/Target/AMDGPU/R600HwRegInfo.include b/lib/Target/AMDGPU/R600HwRegInfo.include
new file mode 100644
index 0000000..e830a4c
--- /dev/null
+++ b/lib/Target/AMDGPU/R600HwRegInfo.include
@@ -0,0 +1,1056 @@
+
+unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const
+{
+ switch(reg) {
+ default: assert(!"Unknown register"); return 0;
+ case AMDGPU::C0_Z:
+ case AMDGPU::C1_Z:
+ case AMDGPU::C2_Z:
+ case AMDGPU::C3_Z:
+ case AMDGPU::C4_Z:
+ case AMDGPU::C5_Z:
+ case AMDGPU::C6_Z:
+ case AMDGPU::C7_Z:
+ case AMDGPU::C8_Z:
+ case AMDGPU::C9_Z:
+ case AMDGPU::C10_Z:
+ case AMDGPU::C11_Z:
+ case AMDGPU::C12_Z:
+ case AMDGPU::C13_Z:
+ case AMDGPU::C14_Z:
+ case AMDGPU::C15_Z:
+ case AMDGPU::C16_Z:
+ case AMDGPU::C17_Z:
+ case AMDGPU::C18_Z:
+ case AMDGPU::C19_Z:
+ case AMDGPU::C20_Z:
+ case AMDGPU::C21_Z:
+ case AMDGPU::C22_Z:
+ case AMDGPU::C23_Z:
+ case AMDGPU::C24_Z:
+ case AMDGPU::C25_Z:
+ case AMDGPU::C26_Z:
+ case AMDGPU::C27_Z:
+ case AMDGPU::C28_Z:
+ case AMDGPU::C29_Z:
+ case AMDGPU::C30_Z:
+ case AMDGPU::C31_Z:
+ case AMDGPU::C32_Z:
+ case AMDGPU::C33_Z:
+ case AMDGPU::C34_Z:
+ case AMDGPU::C35_Z:
+ case AMDGPU::C36_Z:
+ case AMDGPU::C37_Z:
+ case AMDGPU::C38_Z:
+ case AMDGPU::C39_Z:
+ case AMDGPU::C40_Z:
+ case AMDGPU::C41_Z:
+ case AMDGPU::C42_Z:
+ case AMDGPU::C43_Z:
+ case AMDGPU::C44_Z:
+ case AMDGPU::C45_Z:
+ case AMDGPU::C46_Z:
+ case AMDGPU::C47_Z:
+ case AMDGPU::C48_Z:
+ case AMDGPU::C49_Z:
+ case AMDGPU::C50_Z:
+ case AMDGPU::C51_Z:
+ case AMDGPU::C52_Z:
+ case AMDGPU::C53_Z:
+ case AMDGPU::C54_Z:
+ case AMDGPU::C55_Z:
+ case AMDGPU::C56_Z:
+ case AMDGPU::C57_Z:
+ case AMDGPU::C58_Z:
+ case AMDGPU::C59_Z:
+ case AMDGPU::C60_Z:
+ case AMDGPU::C61_Z:
+ case AMDGPU::C62_Z:
+ case AMDGPU::C63_Z:
+ case AMDGPU::C64_Z:
+ case AMDGPU::C65_Z:
+ case AMDGPU::C66_Z:
+ case AMDGPU::C67_Z:
+ case AMDGPU::C68_Z:
+ case AMDGPU::C69_Z:
+ case AMDGPU::C70_Z:
+ case AMDGPU::C71_Z:
+ case AMDGPU::C72_Z:
+ case AMDGPU::C73_Z:
+ case AMDGPU::C74_Z:
+ case AMDGPU::C75_Z:
+ case AMDGPU::C76_Z:
+ case AMDGPU::C77_Z:
+ case AMDGPU::C78_Z:
+ case AMDGPU::C79_Z:
+ case AMDGPU::C80_Z:
+ case AMDGPU::C81_Z:
+ case AMDGPU::C82_Z:
+ case AMDGPU::C83_Z:
+ case AMDGPU::C84_Z:
+ case AMDGPU::C85_Z:
+ case AMDGPU::C86_Z:
+ case AMDGPU::C87_Z:
+ case AMDGPU::C88_Z:
+ case AMDGPU::C89_Z:
+ case AMDGPU::C90_Z:
+ case AMDGPU::C91_Z:
+ case AMDGPU::C92_Z:
+ case AMDGPU::C93_Z:
+ case AMDGPU::C94_Z:
+ case AMDGPU::C95_Z:
+ case AMDGPU::C96_Z:
+ case AMDGPU::C97_Z:
+ case AMDGPU::C98_Z:
+ case AMDGPU::C99_Z:
+ case AMDGPU::T0_Z:
+ case AMDGPU::T1_Z:
+ case AMDGPU::T2_Z:
+ case AMDGPU::T3_Z:
+ case AMDGPU::T4_Z:
+ case AMDGPU::T5_Z:
+ case AMDGPU::T6_Z:
+ case AMDGPU::T7_Z:
+ case AMDGPU::T8_Z:
+ case AMDGPU::T9_Z:
+ case AMDGPU::T10_Z:
+ case AMDGPU::T11_Z:
+ case AMDGPU::T12_Z:
+ case AMDGPU::T13_Z:
+ case AMDGPU::T14_Z:
+ case AMDGPU::T15_Z:
+ case AMDGPU::T16_Z:
+ case AMDGPU::T17_Z:
+ case AMDGPU::T18_Z:
+ case AMDGPU::T19_Z:
+ case AMDGPU::T20_Z:
+ case AMDGPU::T21_Z:
+ case AMDGPU::T22_Z:
+ case AMDGPU::T23_Z:
+ case AMDGPU::T24_Z:
+ case AMDGPU::T25_Z:
+ case AMDGPU::T26_Z:
+ case AMDGPU::T27_Z:
+ case AMDGPU::T28_Z:
+ case AMDGPU::T29_Z:
+ case AMDGPU::T30_Z:
+ case AMDGPU::T31_Z:
+ case AMDGPU::T32_Z:
+ case AMDGPU::T33_Z:
+ case AMDGPU::T34_Z:
+ case AMDGPU::T35_Z:
+ case AMDGPU::T36_Z:
+ case AMDGPU::T37_Z:
+ case AMDGPU::T38_Z:
+ case AMDGPU::T39_Z:
+ case AMDGPU::T40_Z:
+ case AMDGPU::T41_Z:
+ case AMDGPU::T42_Z:
+ case AMDGPU::T43_Z:
+ case AMDGPU::T44_Z:
+ case AMDGPU::T45_Z:
+ case AMDGPU::T46_Z:
+ case AMDGPU::T47_Z:
+ case AMDGPU::T48_Z:
+ case AMDGPU::T49_Z:
+ case AMDGPU::T50_Z:
+ case AMDGPU::T51_Z:
+ case AMDGPU::T52_Z:
+ case AMDGPU::T53_Z:
+ case AMDGPU::T54_Z:
+ case AMDGPU::T55_Z:
+ case AMDGPU::T56_Z:
+ case AMDGPU::T57_Z:
+ case AMDGPU::T58_Z:
+ case AMDGPU::T59_Z:
+ case AMDGPU::T60_Z:
+ case AMDGPU::T61_Z:
+ case AMDGPU::T62_Z:
+ case AMDGPU::T63_Z:
+ case AMDGPU::T64_Z:
+ case AMDGPU::T65_Z:
+ case AMDGPU::T66_Z:
+ case AMDGPU::T67_Z:
+ case AMDGPU::T68_Z:
+ case AMDGPU::T69_Z:
+ case AMDGPU::T70_Z:
+ case AMDGPU::T71_Z:
+ case AMDGPU::T72_Z:
+ case AMDGPU::T73_Z:
+ case AMDGPU::T74_Z:
+ case AMDGPU::T75_Z:
+ case AMDGPU::T76_Z:
+ case AMDGPU::T77_Z:
+ case AMDGPU::T78_Z:
+ case AMDGPU::T79_Z:
+ case AMDGPU::T80_Z:
+ case AMDGPU::T81_Z:
+ case AMDGPU::T82_Z:
+ case AMDGPU::T83_Z:
+ case AMDGPU::T84_Z:
+ case AMDGPU::T85_Z:
+ case AMDGPU::T86_Z:
+ case AMDGPU::T87_Z:
+ case AMDGPU::T88_Z:
+ case AMDGPU::T89_Z:
+ case AMDGPU::T90_Z:
+ case AMDGPU::T91_Z:
+ case AMDGPU::T92_Z:
+ case AMDGPU::T93_Z:
+ case AMDGPU::T94_Z:
+ case AMDGPU::T95_Z:
+ case AMDGPU::T96_Z:
+ case AMDGPU::T97_Z:
+ case AMDGPU::T98_Z:
+ case AMDGPU::T99_Z:
+ case AMDGPU::T100_Z:
+ case AMDGPU::T101_Z:
+ case AMDGPU::T102_Z:
+ case AMDGPU::T103_Z:
+ case AMDGPU::T104_Z:
+ case AMDGPU::T105_Z:
+ case AMDGPU::T106_Z:
+ case AMDGPU::T107_Z:
+ case AMDGPU::T108_Z:
+ case AMDGPU::T109_Z:
+ case AMDGPU::T110_Z:
+ case AMDGPU::T111_Z:
+ case AMDGPU::T112_Z:
+ case AMDGPU::T113_Z:
+ case AMDGPU::T114_Z:
+ case AMDGPU::T115_Z:
+ case AMDGPU::T116_Z:
+ case AMDGPU::T117_Z:
+ case AMDGPU::T118_Z:
+ case AMDGPU::T119_Z:
+ case AMDGPU::T120_Z:
+ case AMDGPU::T121_Z:
+ case AMDGPU::T122_Z:
+ case AMDGPU::T123_Z:
+ case AMDGPU::T124_Z:
+ case AMDGPU::T125_Z:
+ case AMDGPU::T126_Z:
+ case AMDGPU::T127_Z:
+ return 2;
+
+ case AMDGPU::C0_W:
+ case AMDGPU::C1_W:
+ case AMDGPU::C2_W:
+ case AMDGPU::C3_W:
+ case AMDGPU::C4_W:
+ case AMDGPU::C5_W:
+ case AMDGPU::C6_W:
+ case AMDGPU::C7_W:
+ case AMDGPU::C8_W:
+ case AMDGPU::C9_W:
+ case AMDGPU::C10_W:
+ case AMDGPU::C11_W:
+ case AMDGPU::C12_W:
+ case AMDGPU::C13_W:
+ case AMDGPU::C14_W:
+ case AMDGPU::C15_W:
+ case AMDGPU::C16_W:
+ case AMDGPU::C17_W:
+ case AMDGPU::C18_W:
+ case AMDGPU::C19_W:
+ case AMDGPU::C20_W:
+ case AMDGPU::C21_W:
+ case AMDGPU::C22_W:
+ case AMDGPU::C23_W:
+ case AMDGPU::C24_W:
+ case AMDGPU::C25_W:
+ case AMDGPU::C26_W:
+ case AMDGPU::C27_W:
+ case AMDGPU::C28_W:
+ case AMDGPU::C29_W:
+ case AMDGPU::C30_W:
+ case AMDGPU::C31_W:
+ case AMDGPU::C32_W:
+ case AMDGPU::C33_W:
+ case AMDGPU::C34_W:
+ case AMDGPU::C35_W:
+ case AMDGPU::C36_W:
+ case AMDGPU::C37_W:
+ case AMDGPU::C38_W:
+ case AMDGPU::C39_W:
+ case AMDGPU::C40_W:
+ case AMDGPU::C41_W:
+ case AMDGPU::C42_W:
+ case AMDGPU::C43_W:
+ case AMDGPU::C44_W:
+ case AMDGPU::C45_W:
+ case AMDGPU::C46_W:
+ case AMDGPU::C47_W:
+ case AMDGPU::C48_W:
+ case AMDGPU::C49_W:
+ case AMDGPU::C50_W:
+ case AMDGPU::C51_W:
+ case AMDGPU::C52_W:
+ case AMDGPU::C53_W:
+ case AMDGPU::C54_W:
+ case AMDGPU::C55_W:
+ case AMDGPU::C56_W:
+ case AMDGPU::C57_W:
+ case AMDGPU::C58_W:
+ case AMDGPU::C59_W:
+ case AMDGPU::C60_W:
+ case AMDGPU::C61_W:
+ case AMDGPU::C62_W:
+ case AMDGPU::C63_W:
+ case AMDGPU::C64_W:
+ case AMDGPU::C65_W:
+ case AMDGPU::C66_W:
+ case AMDGPU::C67_W:
+ case AMDGPU::C68_W:
+ case AMDGPU::C69_W:
+ case AMDGPU::C70_W:
+ case AMDGPU::C71_W:
+ case AMDGPU::C72_W:
+ case AMDGPU::C73_W:
+ case AMDGPU::C74_W:
+ case AMDGPU::C75_W:
+ case AMDGPU::C76_W:
+ case AMDGPU::C77_W:
+ case AMDGPU::C78_W:
+ case AMDGPU::C79_W:
+ case AMDGPU::C80_W:
+ case AMDGPU::C81_W:
+ case AMDGPU::C82_W:
+ case AMDGPU::C83_W:
+ case AMDGPU::C84_W:
+ case AMDGPU::C85_W:
+ case AMDGPU::C86_W:
+ case AMDGPU::C87_W:
+ case AMDGPU::C88_W:
+ case AMDGPU::C89_W:
+ case AMDGPU::C90_W:
+ case AMDGPU::C91_W:
+ case AMDGPU::C92_W:
+ case AMDGPU::C93_W:
+ case AMDGPU::C94_W:
+ case AMDGPU::C95_W:
+ case AMDGPU::C96_W:
+ case AMDGPU::C97_W:
+ case AMDGPU::C98_W:
+ case AMDGPU::C99_W:
+ case AMDGPU::T0_W:
+ case AMDGPU::T1_W:
+ case AMDGPU::T2_W:
+ case AMDGPU::T3_W:
+ case AMDGPU::T4_W:
+ case AMDGPU::T5_W:
+ case AMDGPU::T6_W:
+ case AMDGPU::T7_W:
+ case AMDGPU::T8_W:
+ case AMDGPU::T9_W:
+ case AMDGPU::T10_W:
+ case AMDGPU::T11_W:
+ case AMDGPU::T12_W:
+ case AMDGPU::T13_W:
+ case AMDGPU::T14_W:
+ case AMDGPU::T15_W:
+ case AMDGPU::T16_W:
+ case AMDGPU::T17_W:
+ case AMDGPU::T18_W:
+ case AMDGPU::T19_W:
+ case AMDGPU::T20_W:
+ case AMDGPU::T21_W:
+ case AMDGPU::T22_W:
+ case AMDGPU::T23_W:
+ case AMDGPU::T24_W:
+ case AMDGPU::T25_W:
+ case AMDGPU::T26_W:
+ case AMDGPU::T27_W:
+ case AMDGPU::T28_W:
+ case AMDGPU::T29_W:
+ case AMDGPU::T30_W:
+ case AMDGPU::T31_W:
+ case AMDGPU::T32_W:
+ case AMDGPU::T33_W:
+ case AMDGPU::T34_W:
+ case AMDGPU::T35_W:
+ case AMDGPU::T36_W:
+ case AMDGPU::T37_W:
+ case AMDGPU::T38_W:
+ case AMDGPU::T39_W:
+ case AMDGPU::T40_W:
+ case AMDGPU::T41_W:
+ case AMDGPU::T42_W:
+ case AMDGPU::T43_W:
+ case AMDGPU::T44_W:
+ case AMDGPU::T45_W:
+ case AMDGPU::T46_W:
+ case AMDGPU::T47_W:
+ case AMDGPU::T48_W:
+ case AMDGPU::T49_W:
+ case AMDGPU::T50_W:
+ case AMDGPU::T51_W:
+ case AMDGPU::T52_W:
+ case AMDGPU::T53_W:
+ case AMDGPU::T54_W:
+ case AMDGPU::T55_W:
+ case AMDGPU::T56_W:
+ case AMDGPU::T57_W:
+ case AMDGPU::T58_W:
+ case AMDGPU::T59_W:
+ case AMDGPU::T60_W:
+ case AMDGPU::T61_W:
+ case AMDGPU::T62_W:
+ case AMDGPU::T63_W:
+ case AMDGPU::T64_W:
+ case AMDGPU::T65_W:
+ case AMDGPU::T66_W:
+ case AMDGPU::T67_W:
+ case AMDGPU::T68_W:
+ case AMDGPU::T69_W:
+ case AMDGPU::T70_W:
+ case AMDGPU::T71_W:
+ case AMDGPU::T72_W:
+ case AMDGPU::T73_W:
+ case AMDGPU::T74_W:
+ case AMDGPU::T75_W:
+ case AMDGPU::T76_W:
+ case AMDGPU::T77_W:
+ case AMDGPU::T78_W:
+ case AMDGPU::T79_W:
+ case AMDGPU::T80_W:
+ case AMDGPU::T81_W:
+ case AMDGPU::T82_W:
+ case AMDGPU::T83_W:
+ case AMDGPU::T84_W:
+ case AMDGPU::T85_W:
+ case AMDGPU::T86_W:
+ case AMDGPU::T87_W:
+ case AMDGPU::T88_W:
+ case AMDGPU::T89_W:
+ case AMDGPU::T90_W:
+ case AMDGPU::T91_W:
+ case AMDGPU::T92_W:
+ case AMDGPU::T93_W:
+ case AMDGPU::T94_W:
+ case AMDGPU::T95_W:
+ case AMDGPU::T96_W:
+ case AMDGPU::T97_W:
+ case AMDGPU::T98_W:
+ case AMDGPU::T99_W:
+ case AMDGPU::T100_W:
+ case AMDGPU::T101_W:
+ case AMDGPU::T102_W:
+ case AMDGPU::T103_W:
+ case AMDGPU::T104_W:
+ case AMDGPU::T105_W:
+ case AMDGPU::T106_W:
+ case AMDGPU::T107_W:
+ case AMDGPU::T108_W:
+ case AMDGPU::T109_W:
+ case AMDGPU::T110_W:
+ case AMDGPU::T111_W:
+ case AMDGPU::T112_W:
+ case AMDGPU::T113_W:
+ case AMDGPU::T114_W:
+ case AMDGPU::T115_W:
+ case AMDGPU::T116_W:
+ case AMDGPU::T117_W:
+ case AMDGPU::T118_W:
+ case AMDGPU::T119_W:
+ case AMDGPU::T120_W:
+ case AMDGPU::T121_W:
+ case AMDGPU::T122_W:
+ case AMDGPU::T123_W:
+ case AMDGPU::T124_W:
+ case AMDGPU::T125_W:
+ case AMDGPU::T126_W:
+ case AMDGPU::T127_W:
+ return 3;
+
+ case AMDGPU::C0_X:
+ case AMDGPU::C1_X:
+ case AMDGPU::C2_X:
+ case AMDGPU::C3_X:
+ case AMDGPU::C4_X:
+ case AMDGPU::C5_X:
+ case AMDGPU::C6_X:
+ case AMDGPU::C7_X:
+ case AMDGPU::C8_X:
+ case AMDGPU::C9_X:
+ case AMDGPU::C10_X:
+ case AMDGPU::C11_X:
+ case AMDGPU::C12_X:
+ case AMDGPU::C13_X:
+ case AMDGPU::C14_X:
+ case AMDGPU::C15_X:
+ case AMDGPU::C16_X:
+ case AMDGPU::C17_X:
+ case AMDGPU::C18_X:
+ case AMDGPU::C19_X:
+ case AMDGPU::C20_X:
+ case AMDGPU::C21_X:
+ case AMDGPU::C22_X:
+ case AMDGPU::C23_X:
+ case AMDGPU::C24_X:
+ case AMDGPU::C25_X:
+ case AMDGPU::C26_X:
+ case AMDGPU::C27_X:
+ case AMDGPU::C28_X:
+ case AMDGPU::C29_X:
+ case AMDGPU::C30_X:
+ case AMDGPU::C31_X:
+ case AMDGPU::C32_X:
+ case AMDGPU::C33_X:
+ case AMDGPU::C34_X:
+ case AMDGPU::C35_X:
+ case AMDGPU::C36_X:
+ case AMDGPU::C37_X:
+ case AMDGPU::C38_X:
+ case AMDGPU::C39_X:
+ case AMDGPU::C40_X:
+ case AMDGPU::C41_X:
+ case AMDGPU::C42_X:
+ case AMDGPU::C43_X:
+ case AMDGPU::C44_X:
+ case AMDGPU::C45_X:
+ case AMDGPU::C46_X:
+ case AMDGPU::C47_X:
+ case AMDGPU::C48_X:
+ case AMDGPU::C49_X:
+ case AMDGPU::C50_X:
+ case AMDGPU::C51_X:
+ case AMDGPU::C52_X:
+ case AMDGPU::C53_X:
+ case AMDGPU::C54_X:
+ case AMDGPU::C55_X:
+ case AMDGPU::C56_X:
+ case AMDGPU::C57_X:
+ case AMDGPU::C58_X:
+ case AMDGPU::C59_X:
+ case AMDGPU::C60_X:
+ case AMDGPU::C61_X:
+ case AMDGPU::C62_X:
+ case AMDGPU::C63_X:
+ case AMDGPU::C64_X:
+ case AMDGPU::C65_X:
+ case AMDGPU::C66_X:
+ case AMDGPU::C67_X:
+ case AMDGPU::C68_X:
+ case AMDGPU::C69_X:
+ case AMDGPU::C70_X:
+ case AMDGPU::C71_X:
+ case AMDGPU::C72_X:
+ case AMDGPU::C73_X:
+ case AMDGPU::C74_X:
+ case AMDGPU::C75_X:
+ case AMDGPU::C76_X:
+ case AMDGPU::C77_X:
+ case AMDGPU::C78_X:
+ case AMDGPU::C79_X:
+ case AMDGPU::C80_X:
+ case AMDGPU::C81_X:
+ case AMDGPU::C82_X:
+ case AMDGPU::C83_X:
+ case AMDGPU::C84_X:
+ case AMDGPU::C85_X:
+ case AMDGPU::C86_X:
+ case AMDGPU::C87_X:
+ case AMDGPU::C88_X:
+ case AMDGPU::C89_X:
+ case AMDGPU::C90_X:
+ case AMDGPU::C91_X:
+ case AMDGPU::C92_X:
+ case AMDGPU::C93_X:
+ case AMDGPU::C94_X:
+ case AMDGPU::C95_X:
+ case AMDGPU::C96_X:
+ case AMDGPU::C97_X:
+ case AMDGPU::C98_X:
+ case AMDGPU::C99_X:
+ case AMDGPU::T0_X:
+ case AMDGPU::T1_X:
+ case AMDGPU::T2_X:
+ case AMDGPU::T3_X:
+ case AMDGPU::T4_X:
+ case AMDGPU::T5_X:
+ case AMDGPU::T6_X:
+ case AMDGPU::T7_X:
+ case AMDGPU::T8_X:
+ case AMDGPU::T9_X:
+ case AMDGPU::T10_X:
+ case AMDGPU::T11_X:
+ case AMDGPU::T12_X:
+ case AMDGPU::T13_X:
+ case AMDGPU::T14_X:
+ case AMDGPU::T15_X:
+ case AMDGPU::T16_X:
+ case AMDGPU::T17_X:
+ case AMDGPU::T18_X:
+ case AMDGPU::T19_X:
+ case AMDGPU::T20_X:
+ case AMDGPU::T21_X:
+ case AMDGPU::T22_X:
+ case AMDGPU::T23_X:
+ case AMDGPU::T24_X:
+ case AMDGPU::T25_X:
+ case AMDGPU::T26_X:
+ case AMDGPU::T27_X:
+ case AMDGPU::T28_X:
+ case AMDGPU::T29_X:
+ case AMDGPU::T30_X:
+ case AMDGPU::T31_X:
+ case AMDGPU::T32_X:
+ case AMDGPU::T33_X:
+ case AMDGPU::T34_X:
+ case AMDGPU::T35_X:
+ case AMDGPU::T36_X:
+ case AMDGPU::T37_X:
+ case AMDGPU::T38_X:
+ case AMDGPU::T39_X:
+ case AMDGPU::T40_X:
+ case AMDGPU::T41_X:
+ case AMDGPU::T42_X:
+ case AMDGPU::T43_X:
+ case AMDGPU::T44_X:
+ case AMDGPU::T45_X:
+ case AMDGPU::T46_X:
+ case AMDGPU::T47_X:
+ case AMDGPU::T48_X:
+ case AMDGPU::T49_X:
+ case AMDGPU::T50_X:
+ case AMDGPU::T51_X:
+ case AMDGPU::T52_X:
+ case AMDGPU::T53_X:
+ case AMDGPU::T54_X:
+ case AMDGPU::T55_X:
+ case AMDGPU::T56_X:
+ case AMDGPU::T57_X:
+ case AMDGPU::T58_X:
+ case AMDGPU::T59_X:
+ case AMDGPU::T60_X:
+ case AMDGPU::T61_X:
+ case AMDGPU::T62_X:
+ case AMDGPU::T63_X:
+ case AMDGPU::T64_X:
+ case AMDGPU::T65_X:
+ case AMDGPU::T66_X:
+ case AMDGPU::T67_X:
+ case AMDGPU::T68_X:
+ case AMDGPU::T69_X:
+ case AMDGPU::T70_X:
+ case AMDGPU::T71_X:
+ case AMDGPU::T72_X:
+ case AMDGPU::T73_X:
+ case AMDGPU::T74_X:
+ case AMDGPU::T75_X:
+ case AMDGPU::T76_X:
+ case AMDGPU::T77_X:
+ case AMDGPU::T78_X:
+ case AMDGPU::T79_X:
+ case AMDGPU::T80_X:
+ case AMDGPU::T81_X:
+ case AMDGPU::T82_X:
+ case AMDGPU::T83_X:
+ case AMDGPU::T84_X:
+ case AMDGPU::T85_X:
+ case AMDGPU::T86_X:
+ case AMDGPU::T87_X:
+ case AMDGPU::T88_X:
+ case AMDGPU::T89_X:
+ case AMDGPU::T90_X:
+ case AMDGPU::T91_X:
+ case AMDGPU::T92_X:
+ case AMDGPU::T93_X:
+ case AMDGPU::T94_X:
+ case AMDGPU::T95_X:
+ case AMDGPU::T96_X:
+ case AMDGPU::T97_X:
+ case AMDGPU::T98_X:
+ case AMDGPU::T99_X:
+ case AMDGPU::T100_X:
+ case AMDGPU::T101_X:
+ case AMDGPU::T102_X:
+ case AMDGPU::T103_X:
+ case AMDGPU::T104_X:
+ case AMDGPU::T105_X:
+ case AMDGPU::T106_X:
+ case AMDGPU::T107_X:
+ case AMDGPU::T108_X:
+ case AMDGPU::T109_X:
+ case AMDGPU::T110_X:
+ case AMDGPU::T111_X:
+ case AMDGPU::T112_X:
+ case AMDGPU::T113_X:
+ case AMDGPU::T114_X:
+ case AMDGPU::T115_X:
+ case AMDGPU::T116_X:
+ case AMDGPU::T117_X:
+ case AMDGPU::T118_X:
+ case AMDGPU::T119_X:
+ case AMDGPU::T120_X:
+ case AMDGPU::T121_X:
+ case AMDGPU::T122_X:
+ case AMDGPU::T123_X:
+ case AMDGPU::T124_X:
+ case AMDGPU::T125_X:
+ case AMDGPU::T126_X:
+ case AMDGPU::T127_X:
+ case AMDGPU::T0_XYZW:
+ case AMDGPU::T1_XYZW:
+ case AMDGPU::T2_XYZW:
+ case AMDGPU::T3_XYZW:
+ case AMDGPU::T4_XYZW:
+ case AMDGPU::T5_XYZW:
+ case AMDGPU::T6_XYZW:
+ case AMDGPU::T7_XYZW:
+ case AMDGPU::T8_XYZW:
+ case AMDGPU::T9_XYZW:
+ case AMDGPU::T10_XYZW:
+ case AMDGPU::T11_XYZW:
+ case AMDGPU::T12_XYZW:
+ case AMDGPU::T13_XYZW:
+ case AMDGPU::T14_XYZW:
+ case AMDGPU::T15_XYZW:
+ case AMDGPU::T16_XYZW:
+ case AMDGPU::T17_XYZW:
+ case AMDGPU::T18_XYZW:
+ case AMDGPU::T19_XYZW:
+ case AMDGPU::T20_XYZW:
+ case AMDGPU::T21_XYZW:
+ case AMDGPU::T22_XYZW:
+ case AMDGPU::T23_XYZW:
+ case AMDGPU::T24_XYZW:
+ case AMDGPU::T25_XYZW:
+ case AMDGPU::T26_XYZW:
+ case AMDGPU::T27_XYZW:
+ case AMDGPU::T28_XYZW:
+ case AMDGPU::T29_XYZW:
+ case AMDGPU::T30_XYZW:
+ case AMDGPU::T31_XYZW:
+ case AMDGPU::T32_XYZW:
+ case AMDGPU::T33_XYZW:
+ case AMDGPU::T34_XYZW:
+ case AMDGPU::T35_XYZW:
+ case AMDGPU::T36_XYZW:
+ case AMDGPU::T37_XYZW:
+ case AMDGPU::T38_XYZW:
+ case AMDGPU::T39_XYZW:
+ case AMDGPU::T40_XYZW:
+ case AMDGPU::T41_XYZW:
+ case AMDGPU::T42_XYZW:
+ case AMDGPU::T43_XYZW:
+ case AMDGPU::T44_XYZW:
+ case AMDGPU::T45_XYZW:
+ case AMDGPU::T46_XYZW:
+ case AMDGPU::T47_XYZW:
+ case AMDGPU::T48_XYZW:
+ case AMDGPU::T49_XYZW:
+ case AMDGPU::T50_XYZW:
+ case AMDGPU::T51_XYZW:
+ case AMDGPU::T52_XYZW:
+ case AMDGPU::T53_XYZW:
+ case AMDGPU::T54_XYZW:
+ case AMDGPU::T55_XYZW:
+ case AMDGPU::T56_XYZW:
+ case AMDGPU::T57_XYZW:
+ case AMDGPU::T58_XYZW:
+ case AMDGPU::T59_XYZW:
+ case AMDGPU::T60_XYZW:
+ case AMDGPU::T61_XYZW:
+ case AMDGPU::T62_XYZW:
+ case AMDGPU::T63_XYZW:
+ case AMDGPU::T64_XYZW:
+ case AMDGPU::T65_XYZW:
+ case AMDGPU::T66_XYZW:
+ case AMDGPU::T67_XYZW:
+ case AMDGPU::T68_XYZW:
+ case AMDGPU::T69_XYZW:
+ case AMDGPU::T70_XYZW:
+ case AMDGPU::T71_XYZW:
+ case AMDGPU::T72_XYZW:
+ case AMDGPU::T73_XYZW:
+ case AMDGPU::T74_XYZW:
+ case AMDGPU::T75_XYZW:
+ case AMDGPU::T76_XYZW:
+ case AMDGPU::T77_XYZW:
+ case AMDGPU::T78_XYZW:
+ case AMDGPU::T79_XYZW:
+ case AMDGPU::T80_XYZW:
+ case AMDGPU::T81_XYZW:
+ case AMDGPU::T82_XYZW:
+ case AMDGPU::T83_XYZW:
+ case AMDGPU::T84_XYZW:
+ case AMDGPU::T85_XYZW:
+ case AMDGPU::T86_XYZW:
+ case AMDGPU::T87_XYZW:
+ case AMDGPU::T88_XYZW:
+ case AMDGPU::T89_XYZW:
+ case AMDGPU::T90_XYZW:
+ case AMDGPU::T91_XYZW:
+ case AMDGPU::T92_XYZW:
+ case AMDGPU::T93_XYZW:
+ case AMDGPU::T94_XYZW:
+ case AMDGPU::T95_XYZW:
+ case AMDGPU::T96_XYZW:
+ case AMDGPU::T97_XYZW:
+ case AMDGPU::T98_XYZW:
+ case AMDGPU::T99_XYZW:
+ case AMDGPU::T100_XYZW:
+ case AMDGPU::T101_XYZW:
+ case AMDGPU::T102_XYZW:
+ case AMDGPU::T103_XYZW:
+ case AMDGPU::T104_XYZW:
+ case AMDGPU::T105_XYZW:
+ case AMDGPU::T106_XYZW:
+ case AMDGPU::T107_XYZW:
+ case AMDGPU::T108_XYZW:
+ case AMDGPU::T109_XYZW:
+ case AMDGPU::T110_XYZW:
+ case AMDGPU::T111_XYZW:
+ case AMDGPU::T112_XYZW:
+ case AMDGPU::T113_XYZW:
+ case AMDGPU::T114_XYZW:
+ case AMDGPU::T115_XYZW:
+ case AMDGPU::T116_XYZW:
+ case AMDGPU::T117_XYZW:
+ case AMDGPU::T118_XYZW:
+ case AMDGPU::T119_XYZW:
+ case AMDGPU::T120_XYZW:
+ case AMDGPU::T121_XYZW:
+ case AMDGPU::T122_XYZW:
+ case AMDGPU::T123_XYZW:
+ case AMDGPU::T124_XYZW:
+ case AMDGPU::T125_XYZW:
+ case AMDGPU::T126_XYZW:
+ case AMDGPU::T127_XYZW:
+ return 0;
+
+ case AMDGPU::C0_Y:
+ case AMDGPU::C1_Y:
+ case AMDGPU::C2_Y:
+ case AMDGPU::C3_Y:
+ case AMDGPU::C4_Y:
+ case AMDGPU::C5_Y:
+ case AMDGPU::C6_Y:
+ case AMDGPU::C7_Y:
+ case AMDGPU::C8_Y:
+ case AMDGPU::C9_Y:
+ case AMDGPU::C10_Y:
+ case AMDGPU::C11_Y:
+ case AMDGPU::C12_Y:
+ case AMDGPU::C13_Y:
+ case AMDGPU::C14_Y:
+ case AMDGPU::C15_Y:
+ case AMDGPU::C16_Y:
+ case AMDGPU::C17_Y:
+ case AMDGPU::C18_Y:
+ case AMDGPU::C19_Y:
+ case AMDGPU::C20_Y:
+ case AMDGPU::C21_Y:
+ case AMDGPU::C22_Y:
+ case AMDGPU::C23_Y:
+ case AMDGPU::C24_Y:
+ case AMDGPU::C25_Y:
+ case AMDGPU::C26_Y:
+ case AMDGPU::C27_Y:
+ case AMDGPU::C28_Y:
+ case AMDGPU::C29_Y:
+ case AMDGPU::C30_Y:
+ case AMDGPU::C31_Y:
+ case AMDGPU::C32_Y:
+ case AMDGPU::C33_Y:
+ case AMDGPU::C34_Y:
+ case AMDGPU::C35_Y:
+ case AMDGPU::C36_Y:
+ case AMDGPU::C37_Y:
+ case AMDGPU::C38_Y:
+ case AMDGPU::C39_Y:
+ case AMDGPU::C40_Y:
+ case AMDGPU::C41_Y:
+ case AMDGPU::C42_Y:
+ case AMDGPU::C43_Y:
+ case AMDGPU::C44_Y:
+ case AMDGPU::C45_Y:
+ case AMDGPU::C46_Y:
+ case AMDGPU::C47_Y:
+ case AMDGPU::C48_Y:
+ case AMDGPU::C49_Y:
+ case AMDGPU::C50_Y:
+ case AMDGPU::C51_Y:
+ case AMDGPU::C52_Y:
+ case AMDGPU::C53_Y:
+ case AMDGPU::C54_Y:
+ case AMDGPU::C55_Y:
+ case AMDGPU::C56_Y:
+ case AMDGPU::C57_Y:
+ case AMDGPU::C58_Y:
+ case AMDGPU::C59_Y:
+ case AMDGPU::C60_Y:
+ case AMDGPU::C61_Y:
+ case AMDGPU::C62_Y:
+ case AMDGPU::C63_Y:
+ case AMDGPU::C64_Y:
+ case AMDGPU::C65_Y:
+ case AMDGPU::C66_Y:
+ case AMDGPU::C67_Y:
+ case AMDGPU::C68_Y:
+ case AMDGPU::C69_Y:
+ case AMDGPU::C70_Y:
+ case AMDGPU::C71_Y:
+ case AMDGPU::C72_Y:
+ case AMDGPU::C73_Y:
+ case AMDGPU::C74_Y:
+ case AMDGPU::C75_Y:
+ case AMDGPU::C76_Y:
+ case AMDGPU::C77_Y:
+ case AMDGPU::C78_Y:
+ case AMDGPU::C79_Y:
+ case AMDGPU::C80_Y:
+ case AMDGPU::C81_Y:
+ case AMDGPU::C82_Y:
+ case AMDGPU::C83_Y:
+ case AMDGPU::C84_Y:
+ case AMDGPU::C85_Y:
+ case AMDGPU::C86_Y:
+ case AMDGPU::C87_Y:
+ case AMDGPU::C88_Y:
+ case AMDGPU::C89_Y:
+ case AMDGPU::C90_Y:
+ case AMDGPU::C91_Y:
+ case AMDGPU::C92_Y:
+ case AMDGPU::C93_Y:
+ case AMDGPU::C94_Y:
+ case AMDGPU::C95_Y:
+ case AMDGPU::C96_Y:
+ case AMDGPU::C97_Y:
+ case AMDGPU::C98_Y:
+ case AMDGPU::C99_Y:
+ case AMDGPU::T0_Y:
+ case AMDGPU::T1_Y:
+ case AMDGPU::T2_Y:
+ case AMDGPU::T3_Y:
+ case AMDGPU::T4_Y:
+ case AMDGPU::T5_Y:
+ case AMDGPU::T6_Y:
+ case AMDGPU::T7_Y:
+ case AMDGPU::T8_Y:
+ case AMDGPU::T9_Y:
+ case AMDGPU::T10_Y:
+ case AMDGPU::T11_Y:
+ case AMDGPU::T12_Y:
+ case AMDGPU::T13_Y:
+ case AMDGPU::T14_Y:
+ case AMDGPU::T15_Y:
+ case AMDGPU::T16_Y:
+ case AMDGPU::T17_Y:
+ case AMDGPU::T18_Y:
+ case AMDGPU::T19_Y:
+ case AMDGPU::T20_Y:
+ case AMDGPU::T21_Y:
+ case AMDGPU::T22_Y:
+ case AMDGPU::T23_Y:
+ case AMDGPU::T24_Y:
+ case AMDGPU::T25_Y:
+ case AMDGPU::T26_Y:
+ case AMDGPU::T27_Y:
+ case AMDGPU::T28_Y:
+ case AMDGPU::T29_Y:
+ case AMDGPU::T30_Y:
+ case AMDGPU::T31_Y:
+ case AMDGPU::T32_Y:
+ case AMDGPU::T33_Y:
+ case AMDGPU::T34_Y:
+ case AMDGPU::T35_Y:
+ case AMDGPU::T36_Y:
+ case AMDGPU::T37_Y:
+ case AMDGPU::T38_Y:
+ case AMDGPU::T39_Y:
+ case AMDGPU::T40_Y:
+ case AMDGPU::T41_Y:
+ case AMDGPU::T42_Y:
+ case AMDGPU::T43_Y:
+ case AMDGPU::T44_Y:
+ case AMDGPU::T45_Y:
+ case AMDGPU::T46_Y:
+ case AMDGPU::T47_Y:
+ case AMDGPU::T48_Y:
+ case AMDGPU::T49_Y:
+ case AMDGPU::T50_Y:
+ case AMDGPU::T51_Y:
+ case AMDGPU::T52_Y:
+ case AMDGPU::T53_Y:
+ case AMDGPU::T54_Y:
+ case AMDGPU::T55_Y:
+ case AMDGPU::T56_Y:
+ case AMDGPU::T57_Y:
+ case AMDGPU::T58_Y:
+ case AMDGPU::T59_Y:
+ case AMDGPU::T60_Y:
+ case AMDGPU::T61_Y:
+ case AMDGPU::T62_Y:
+ case AMDGPU::T63_Y:
+ case AMDGPU::T64_Y:
+ case AMDGPU::T65_Y:
+ case AMDGPU::T66_Y:
+ case AMDGPU::T67_Y:
+ case AMDGPU::T68_Y:
+ case AMDGPU::T69_Y:
+ case AMDGPU::T70_Y:
+ case AMDGPU::T71_Y:
+ case AMDGPU::T72_Y:
+ case AMDGPU::T73_Y:
+ case AMDGPU::T74_Y:
+ case AMDGPU::T75_Y:
+ case AMDGPU::T76_Y:
+ case AMDGPU::T77_Y:
+ case AMDGPU::T78_Y:
+ case AMDGPU::T79_Y:
+ case AMDGPU::T80_Y:
+ case AMDGPU::T81_Y:
+ case AMDGPU::T82_Y:
+ case AMDGPU::T83_Y:
+ case AMDGPU::T84_Y:
+ case AMDGPU::T85_Y:
+ case AMDGPU::T86_Y:
+ case AMDGPU::T87_Y:
+ case AMDGPU::T88_Y:
+ case AMDGPU::T89_Y:
+ case AMDGPU::T90_Y:
+ case AMDGPU::T91_Y:
+ case AMDGPU::T92_Y:
+ case AMDGPU::T93_Y:
+ case AMDGPU::T94_Y:
+ case AMDGPU::T95_Y:
+ case AMDGPU::T96_Y:
+ case AMDGPU::T97_Y:
+ case AMDGPU::T98_Y:
+ case AMDGPU::T99_Y:
+ case AMDGPU::T100_Y:
+ case AMDGPU::T101_Y:
+ case AMDGPU::T102_Y:
+ case AMDGPU::T103_Y:
+ case AMDGPU::T104_Y:
+ case AMDGPU::T105_Y:
+ case AMDGPU::T106_Y:
+ case AMDGPU::T107_Y:
+ case AMDGPU::T108_Y:
+ case AMDGPU::T109_Y:
+ case AMDGPU::T110_Y:
+ case AMDGPU::T111_Y:
+ case AMDGPU::T112_Y:
+ case AMDGPU::T113_Y:
+ case AMDGPU::T114_Y:
+ case AMDGPU::T115_Y:
+ case AMDGPU::T116_Y:
+ case AMDGPU::T117_Y:
+ case AMDGPU::T118_Y:
+ case AMDGPU::T119_Y:
+ case AMDGPU::T120_Y:
+ case AMDGPU::T121_Y:
+ case AMDGPU::T122_Y:
+ case AMDGPU::T123_Y:
+ case AMDGPU::T124_Y:
+ case AMDGPU::T125_Y:
+ case AMDGPU::T126_Y:
+ case AMDGPU::T127_Y:
+ return 1;
+
+ }
+}
+
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
new file mode 100644
index 0000000..7f80cec
--- /dev/null
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -0,0 +1,522 @@
+//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
+// is mostly EmitInstrWithCustomInserter().
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600ISelLowering.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+using namespace llvm;
+
+R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
+ AMDGPUTargetLowering(TM),
+ TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
+{
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
+ addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
+ computeRegisterProperties();
+
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+
+ setOperationAction(ISD::FSUB, MVT::f32, Expand);
+
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+ setSchedulingPreference(Sched::VLIW);
+}
+
+MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr * MI, MachineBasicBlock * BB) const
+{
+ MachineFunction * MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineBasicBlock::iterator I = *MI;
+
+ switch (MI->getOpcode()) {
+ default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case AMDGPU::CLAMP_R600:
+ {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(0) // Flags
+ .addReg(AMDGPU::PRED_SEL_OFF);
+ TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
+ break;
+ }
+ case AMDGPU::FABS_R600:
+ {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(0) // Flags
+ .addReg(AMDGPU::PRED_SEL_OFF);
+ TII->addFlag(NewMI, 1, MO_FLAG_ABS);
+ break;
+ }
+
+ case AMDGPU::FNEG_R600:
+ {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(0) // Flags
+ .addReg(AMDGPU::PRED_SEL_OFF);
+ TII->addFlag(NewMI, 1, MO_FLAG_NEG);
+ break;
+ }
+
+ case AMDGPU::R600_LOAD_CONST:
+ {
+ int64_t RegIndex = MI->getOperand(1).getImm();
+ unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
+ .addOperand(MI->getOperand(0))
+ .addReg(ConstantReg);
+ break;
+ }
+
+ case AMDGPU::MASK_WRITE:
+ {
+ unsigned maskedRegister = MI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
+ MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
+ TII->addFlag(defInstr, 0, MO_FLAG_MASK);
+ // Return early so the instruction is not erased
+ return BB;
+ }
+
+ case AMDGPU::RAT_WRITE_CACHELESS_eg:
+ {
+ // Convert to DWORD address
+ unsigned NewAddr = MRI.createVirtualRegister(
+ &AMDGPU::R600_TReg32_XRegClass);
+ unsigned ShiftValue = MRI.createVirtualRegister(
+ &AMDGPU::R600_TReg32RegClass);
+ unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+
+ // XXX In theory, we should be able to pass ShiftValue directly to
+ // the LSHR_eg instruction as an inline literal, but I tried doing it
+ // this way and it didn't produce the correct results.
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32),
+ ShiftValue)
+ .addReg(AMDGPU::ALU_LITERAL_X)
+ .addReg(AMDGPU::PRED_SEL_OFF)
+ .addImm(2);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
+ .addOperand(MI->getOperand(1))
+ .addReg(ShiftValue)
+ .addReg(AMDGPU::PRED_SEL_OFF);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+ .addOperand(MI->getOperand(0))
+ .addReg(NewAddr)
+ .addImm(EOP); // Set End of program bit
+ break;
+ }
+
+ case AMDGPU::RESERVE_REG:
+ {
+ R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
+ int64_t ReservedIndex = MI->getOperand(0).getImm();
+ unsigned ReservedReg =
+ AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
+ MFI->ReservedRegs.push_back(ReservedReg);
+ break;
+ }
+
+ case AMDGPU::TXD:
+ {
+ unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addReg(t0, RegState::Implicit)
+ .addReg(t1, RegState::Implicit);
+ break;
+ }
+ case AMDGPU::TXD_SHADOW:
+ {
+ unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addReg(t0, RegState::Implicit)
+ .addReg(t1, RegState::Implicit);
+ break;
+ }
+ case AMDGPU::BRANCH:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(0);
+ break;
+ case AMDGPU::BRANCH_COND_f32:
+ {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
+ .addReg(AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_ZERO)
+ .addImm(0); // Flags
+ TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
+ }
+ case AMDGPU::BRANCH_COND_i32:
+ {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
+ .addReg(AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_ZERO_INT)
+ .addImm(0); // Flags
+ TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
+ }
+ }
+
+ MI->eraseFromParent();
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+using namespace llvm::Intrinsic;
+using namespace llvm::AMDGPUIntrinsic;
+
+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode()) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::ROTL: return LowerROTL(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::INTRINSIC_VOID: {
+ SDValue Chain = Op.getOperand(0);
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::AMDGPU_store_output: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+ if (!MRI.isLiveOut(Reg)) {
+ MRI.addLiveOut(Reg);
+ }
+ return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
+ }
+ // default for switch(IntrinsicID)
+ default: break;
+ }
+ // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ switch(IntrinsicID) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case AMDGPUIntrinsic::R600_load_input: {
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
+ }
+
+ case r600_read_ngroups_x:
+ return LowerImplicitParameter(DAG, VT, DL, 0);
+ case r600_read_ngroups_y:
+ return LowerImplicitParameter(DAG, VT, DL, 1);
+ case r600_read_ngroups_z:
+ return LowerImplicitParameter(DAG, VT, DL, 2);
+ case r600_read_global_size_x:
+ return LowerImplicitParameter(DAG, VT, DL, 3);
+ case r600_read_global_size_y:
+ return LowerImplicitParameter(DAG, VT, DL, 4);
+ case r600_read_global_size_z:
+ return LowerImplicitParameter(DAG, VT, DL, 5);
+ case r600_read_local_size_x:
+ return LowerImplicitParameter(DAG, VT, DL, 6);
+ case r600_read_local_size_y:
+ return LowerImplicitParameter(DAG, VT, DL, 7);
+ case r600_read_local_size_z:
+ return LowerImplicitParameter(DAG, VT, DL, 8);
+
+ case r600_read_tgid_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_X, VT);
+ case r600_read_tgid_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_Y, VT);
+ case r600_read_tgid_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_Z, VT);
+ case r600_read_tidig_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_X, VT);
+ case r600_read_tidig_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_Y, VT);
+ case r600_read_tidig_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_Z, VT);
+ }
+ // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
+ break;
+ }
+ } // end switch(Op.getOpcode())
+ return SDValue();
+}
+
+SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue CC = Op.getOperand(1);
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue JumpT = Op.getOperand(4);
+ SDValue CmpValue;
+ SDValue Result;
+ CmpValue = DAG.getNode(
+ ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ MVT::i32,
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ Result = DAG.getNode(
+ AMDGPUISD::BRANCH_COND,
+ CmpValue.getDebugLoc(),
+ MVT::Other, Chain,
+ JumpT, CmpValue);
+ return Result;
+}
+
+SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+ DebugLoc DL,
+ unsigned DwordOffset) const
+{
+ unsigned ByteOffset = DwordOffset * 4;
+ PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+ AMDGPUAS::PARAM_I_ADDRESS);
+
+ // We shouldn't be using an offset wider than 16-bits for implicit parameters.
+ assert(isInt<16>(ByteOffset));
+
+ return DAG.getLoad(VT, DL, DAG.getEntryNode(),
+ DAG.getConstant(ByteOffset, MVT::i32), // PTR
+ MachinePointerInfo(ConstantPointerNull::get(PtrType)),
+ false, false, false, 0);
+}
+
+SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
+ Op.getOperand(0),
+ Op.getOperand(0),
+ DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(32, MVT::i32),
+ Op.getOperand(1)));
+}
+
+SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ SDValue Temp;
+
+ // LHS and RHS are guaranteed to be the same value type
+ EVT CompareVT = LHS.getValueType();
+
+ // We need all the operands of SELECT_CC to have the same value type, so if
+ // necessary we need to convert LHS and RHS to be the same type True and
+ // False. True and False are guaranteed to have the same type as this
+ // SELECT_CC node.
+
+ if (CompareVT != VT) {
+ ISD::NodeType ConversionOp = ISD::DELETED_NODE;
+ if (VT == MVT::f32 && CompareVT == MVT::i32) {
+ if (isUnsignedIntSetCC(CCOpcode)) {
+ ConversionOp = ISD::UINT_TO_FP;
+ } else {
+ ConversionOp = ISD::SINT_TO_FP;
+ }
+ } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
+ ConversionOp = ISD::FP_TO_SINT;
+ } else {
+ // I don't think there will be any other type pairings.
+ assert(!"Unhandled operand type parings in SELECT_CC");
+ }
+ // XXX Check the value of LHS and RHS and avoid creating sequences like
+ // (FTOI (ITOF))
+ LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
+ RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
+ }
+
+ // If True is a hardware TRUE value and False is a hardware FALSE value or
+ // vice-versa we can handle this with a native instruction (SET* instructions).
+ if ((isHWTrueValue(True) && isHWFalseValue(False))) {
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+ }
+
+ // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
+ // we can handle this with a native instruction, but we need to swap true
+ // and false and change the conditional.
+ if (isHWTrueValue(False) && isHWFalseValue(True)) {
+ }
+
+ // XXX Check if we can lower this to a SELECT or if it is supported by a native
+ // operation. (The code below does this but we don't have the Instruction
+ // selection patterns to do this yet.
+#if 0
+ if (isZero(LHS) || isZero(RHS)) {
+ SDValue Cond = (isZero(LHS) ? RHS : LHS);
+ bool SwapTF = false;
+ switch (CCOpcode) {
+ case ISD::SETOEQ:
+ case ISD::SETUEQ:
+ case ISD::SETEQ:
+ SwapTF = true;
+ // Fall through
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ // We can lower to select
+ if (SwapTF) {
+ Temp = True;
+ True = False;
+ False = Temp;
+ }
+ // CNDE
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+ default:
+ // Supported by a native operation (CNDGE, CNDGT)
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+ }
+ }
+#endif
+
+ // If we make it this for it means we have no native instructions to handle
+ // this SELECT_CC, so we must lower it.
+ SDValue HWTrue, HWFalse;
+
+ if (VT == MVT::f32) {
+ HWTrue = DAG.getConstantFP(1.0f, VT);
+ HWFalse = DAG.getConstantFP(0.0f, VT);
+ } else if (VT == MVT::i32) {
+ HWTrue = DAG.getConstant(-1, VT);
+ HWFalse = DAG.getConstant(0, VT);
+ }
+ else {
+ assert(!"Unhandled value type in LowerSELECT_CC");
+ }
+
+ // Lower this unsupported SELECT_CC into a combination of two supported
+ // SELECT_CC operations.
+ SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
+
+ // Convert floating point condition to i1
+ if (VT == MVT::f32) {
+ Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
+ DAG.getNode(ISD::FNEG, DL, VT, Cond));
+ }
+
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+}
+
+SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ assert(Op.getValueType() == MVT::i32);
+ Cond = DAG.getNode(
+ ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ MVT::i32,
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ Cond = DAG.getNode(
+ ISD::AND,
+ DL,
+ MVT::i32,
+ DAG.getConstant(1, MVT::i32),
+ Cond);
+ return Cond;
+}
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
new file mode 100644
index 0000000..2eb7edd
--- /dev/null
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -0,0 +1,56 @@
+//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600ISELLOWERING_H
+#define R600ISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+
+namespace llvm {
+
+class R600InstrInfo;
+
+class R600TargetLowering : public AMDGPUTargetLowering
+{
+public:
+ R600TargetLowering(TargetMachine &TM);
+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock * BB) const;
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+private:
+ const R600InstrInfo * TII;
+
+ /// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters
+ /// that are stored in the first nine dwords of a Vertex Buffer. These
+ /// implicit parameters are lowered to load instructions which retreive the
+ /// values from the Vertex Buffer.
+ SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+ DebugLoc DL, unsigned DwordOffset) const;
+
+ void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineRegisterInfo & MRI, unsigned dword_offset) const;
+
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+
+ /// LowerROTL - Lower ROTL opcode to BITALIGN
+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
+};
+
+} // End namespace llvm;
+
+#endif // R600ISELLOWERING_H
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
new file mode 100644
index 0000000..f1da5c2
--- /dev/null
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -0,0 +1,511 @@
+//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600InstrInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPUSubtarget.h"
+#include "R600Defines.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "AMDILUtilityFunctions.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AMDGPUGenDFAPacketizer.inc"
+
+using namespace llvm;
+
+R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
+ : AMDGPUInstrInfo(tm),
+ RI(tm, *this)
+ { }
+
+const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
+{
+ return RI;
+}
+
+bool R600InstrInfo::isTrig(const MachineInstr &MI) const
+{
+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
+}
+
+bool R600InstrInfo::isVector(const MachineInstr &MI) const
+{
+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
+}
+
+void
+R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+ if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
+ && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
+ for (unsigned I = 0; I < 4; I++) {
+ unsigned SubRegIndex = RI.getSubRegFromChannel(I);
+ BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
+ .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
+ .addReg(RI.getSubReg(SrcReg, SubRegIndex))
+ .addImm(0) // Flag
+ .addReg(0) // PREDICATE_BIT
+ .addReg(DestReg, RegState::Define | RegState::Implicit);
+ }
+ } else {
+
+ /* We can't copy vec4 registers */
+ assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
+ && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0) // Flag
+ .addReg(0); // PREDICATE_BIT
+ }
+}
+
+MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
+ unsigned DstReg, int64_t Imm) const
+{
+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
+ MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
+ MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
+ MachineInstrBuilder(MI).addImm(Imm);
+ MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
+
+ return MI;
+}
+
+unsigned R600InstrInfo::getIEQOpcode() const
+{
+ return AMDGPU::SETE_INT;
+}
+
+bool R600InstrInfo::isMov(unsigned Opcode) const
+{
+
+
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::MOV:
+ case AMDGPU::MOV_IMM_F32:
+ case AMDGPU::MOV_IMM_I32:
+ return true;
+ }
+}
+
+// Some instructions act as place holders to emulate operations that the GPU
+// hardware does automatically. This function can be used to check if
+// an opcode falls into this category.
+bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const
+{
+ switch (Opcode) {
+ default: return false;
+ case AMDGPU::RETURN:
+ case AMDGPU::MASK_WRITE:
+ case AMDGPU::RESERVE_REG:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isReductionOp(unsigned Opcode) const
+{
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::DOT4_r600:
+ case AMDGPU::DOT4_eg:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isCubeOp(unsigned Opcode) const
+{
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::CUBE_r600_pseudo:
+ case AMDGPU::CUBE_r600_real:
+ case AMDGPU::CUBE_eg_pseudo:
+ case AMDGPU::CUBE_eg_real:
+ return true;
+ }
+}
+
+DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const
+{
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
+}
+
+static bool
+isPredicateSetter(unsigned Opcode)
+{
+ switch (Opcode) {
+ case AMDGPU::PRED_X:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static MachineInstr *
+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I)
+{
+ while (I != MBB.begin()) {
+ --I;
+ MachineInstr *MI = I;
+ if (isPredicateSetter(MI->getOpcode()))
+ return MI;
+ }
+
+ return NULL;
+}
+
+bool
+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+ // Most of the following comes from the ARM implementation of AnalyzeBranch
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
+ return false;
+ }
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() ||
+ static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
+ if (LastOpc == AMDGPU::JUMP) {
+ if(!isPredicated(LastInst)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ MachineInstr *predSet = I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (SecondLastOpc == AMDGPU::JUMP &&
+ isPredicated(SecondLastInst) &&
+ LastOpc == AMDGPU::JUMP &&
+ !isPredicated(LastInst)) {
+ MachineInstr *predSet = --I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ FBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
+ const MachineInstr *MI = op.getParent();
+
+ switch (MI->getDesc().OpInfo->RegClass) {
+ default: // FIXME: fallthrough??
+ case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
+ case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
+ };
+}
+
+unsigned
+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const
+{
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
+ return 1;
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ addFlag(PredSet, 1, MO_FLAG_PUSH);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ return 1;
+ }
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ addFlag(PredSet, 1, MO_FLAG_PUSH);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
+ return 2;
+ }
+}
+
+unsigned
+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+
+ // Note : we leave PRED* instructions there.
+ // They may be needed when predicating instructions.
+
+ MachineBasicBlock::iterator I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 0;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ default:
+ return 0;
+ case AMDGPU::JUMP:
+ if (isPredicated(I)) {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ clearFlag(predSet, 1, MO_FLAG_PUSH);
+ }
+ I->eraseFromParent();
+ break;
+ }
+ I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 1;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ // FIXME: only one case??
+ default:
+ return 1;
+ case AMDGPU::JUMP:
+ if (isPredicated(I)) {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ clearFlag(predSet, 1, MO_FLAG_PUSH);
+ }
+ I->eraseFromParent();
+ break;
+ }
+ return 2;
+}
+
+bool
+R600InstrInfo::isPredicated(const MachineInstr *MI) const
+{
+ int idx = MI->findFirstPredOperandIdx();
+ if (idx < 0)
+ return false;
+
+ unsigned Reg = MI->getOperand(idx).getReg();
+ switch (Reg) {
+ default: return false;
+ case AMDGPU::PRED_SEL_ONE:
+ case AMDGPU::PRED_SEL_ZERO:
+ case AMDGPU::PREDICATE_BIT:
+ return true;
+ }
+}
+
+bool
+R600InstrInfo::isPredicable(MachineInstr *MI) const
+{
+ return AMDGPUInstrInfo::isPredicable(MI);
+}
+
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const{
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles,
+ unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles,
+ unsigned ExtraFCycles,
+ const BranchProbability &Probability) const
+{
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ const BranchProbability &Probability)
+ const
+{
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const
+{
+ return false;
+}
+
+
+bool
+R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+ MachineOperand &MO = Cond[1];
+ switch (MO.getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ MO.setImm(OPCODE_IS_NOT_ZERO_INT);
+ break;
+ case OPCODE_IS_NOT_ZERO_INT:
+ MO.setImm(OPCODE_IS_ZERO_INT);
+ break;
+ case OPCODE_IS_ZERO:
+ MO.setImm(OPCODE_IS_NOT_ZERO);
+ break;
+ case OPCODE_IS_NOT_ZERO:
+ MO.setImm(OPCODE_IS_ZERO);
+ break;
+ default:
+ return true;
+ }
+
+ MachineOperand &MO2 = Cond[2];
+ switch (MO2.getReg()) {
+ case AMDGPU::PRED_SEL_ZERO:
+ MO2.setReg(AMDGPU::PRED_SEL_ONE);
+ break;
+ case AMDGPU::PRED_SEL_ONE:
+ MO2.setReg(AMDGPU::PRED_SEL_ZERO);
+ break;
+ default:
+ return true;
+ }
+ return false;
+}
+
+bool
+R600InstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const
+{
+ return isPredicateSetter(MI->getOpcode());
+}
+
+
+bool
+R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const
+{
+ return false;
+}
+
+
+bool
+R600InstrInfo::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const
+{
+ int PIdx = MI->findFirstPredOperandIdx();
+
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setReg(Pred[2].getReg());
+ MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+ return true;
+ }
+
+ return false;
+}
+
+unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const
+{
+ if (PredCost)
+ *PredCost = 2;
+ return 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction flag getters/setters
+//===----------------------------------------------------------------------===//
+
+bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const
+{
+ return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
+}
+
+MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const
+{
+ unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags);
+ assert(FlagIndex != 0 &&
+ "Instruction flags not supported for this instruction");
+ MachineOperand &FlagOp = MI->getOperand(FlagIndex);
+ assert(FlagOp.isImm());
+ return FlagOp;
+}
+
+void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
+ unsigned Flag) const
+{
+ MachineOperand &FlagOp = getFlagOp(MI);
+ FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
+}
+
+void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
+ unsigned Flag) const
+{
+ MachineOperand &FlagOp = getFlagOp(MI);
+ unsigned InstFlags = FlagOp.getImm();
+ InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
+ FlagOp.setImm(InstFlags);
+}
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
new file mode 100644
index 0000000..3a9aaeb
--- /dev/null
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -0,0 +1,144 @@
+//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface definition for R600InstrInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600INSTRUCTIONINFO_H_
+#define R600INSTRUCTIONINFO_H_
+
+#include "AMDIL.h"
+#include "AMDGPUInstrInfo.h"
+#include "R600RegisterInfo.h"
+
+#include <map>
+
+namespace llvm {
+
+ class AMDGPUTargetMachine;
+ class DFAPacketizer;
+ class ScheduleDAG;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineInstrBuilder;
+
+ class R600InstrInfo : public AMDGPUInstrInfo {
+ private:
+ const R600RegisterInfo RI;
+
+ int getBranchInstr(const MachineOperand &op) const;
+
+ public:
+ explicit R600InstrInfo(AMDGPUTargetMachine &tm);
+
+ const R600RegisterInfo &getRegisterInfo() const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ bool isTrig(const MachineInstr &MI) const;
+ bool isPlaceHolderOpcode(unsigned opcode) const;
+ bool isReductionOp(unsigned opcode) const;
+ bool isCubeOp(unsigned opcode) const;
+
+ /// isVector - Vector instructions are instructions that must fill all
+ /// instruction slots within an instruction group.
+ bool isVector(const MachineInstr &MI) const;
+
+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const;
+
+ virtual unsigned getIEQOpcode() const;
+ virtual bool isMov(unsigned Opcode) const;
+
+ DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ bool isPredicated(const MachineInstr *MI) const;
+
+ bool isPredicable(MachineInstr *MI) const;
+
+ bool
+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ const BranchProbability &Probability) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const ;
+
+ bool
+ isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ const BranchProbability &Probability) const;
+
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const;
+
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ unsigned int getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const;
+
+ virtual int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const { return 1;}
+
+ ///hasFlagOperand - Returns true if this instruction has an operand for
+ /// storing target flags.
+ bool hasFlagOperand(const MachineInstr &MI) const;
+
+ ///addFlag - Add one of the MO_FLAG* flags to the specified Operand.
+ void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+
+ ///isFlagSet - Determine if the specified flag is set on this Operand.
+ bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
+
+ ///getFlagOp - Return the operand containing the flags for this instruction.
+ MachineOperand &getFlagOp(MachineInstr *MI) const;
+
+ ///clearFlag - Clear the specified flag on the instruction.
+ void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+};
+
+} // End llvm namespace
+
+namespace R600_InstFlag {
+ enum TIF {
+ TRANS_ONLY = (1 << 0),
+ TEX = (1 << 1),
+ REDUCTION = (1 << 2),
+ FC = (1 << 3),
+ TRIG = (1 << 4),
+ OP3 = (1 << 5),
+ VECTOR = (1 << 6)
+ //FlagOperand bits 7, 8
+ };
+}
+
+#endif // R600INSTRINFO_H_
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
new file mode 100644
index 0000000..519f384
--- /dev/null
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -0,0 +1,1264 @@
+//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Tablegen instruction definitions
+//
+//===----------------------------------------------------------------------===//
+
+include "R600Intrinsics.td"
+
+class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin>
+ : AMDGPUInst <outs, ins, asm, pattern> {
+
+ field bits<32> Inst;
+ bit Trig = 0;
+ bit Op3 = 0;
+ bit isVector = 0;
+ bits<2> FlagOperandIdx = 0;
+
+ let Inst = inst;
+ let Namespace = "AMDGPU";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ let TSFlags{4} = Trig;
+ let TSFlags{5} = Op3;
+
+ // Vector instructions are instructions that must fill all slots in an
+ // instruction group
+ let TSFlags{6} = isVector;
+ let TSFlags{8-7} = FlagOperandIdx;
+}
+
+class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
+ AMDGPUInst <outs, ins, asm, pattern>
+{
+ field bits<64> Inst;
+
+ let Namespace = "AMDGPU";
+}
+
+def MEMxi : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
+}
+
+def MEMrr : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
+}
+
+def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
+def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
+def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+
+class R600_ALU {
+
+ bits<7> DST_GPR = 0;
+ bits<9> SRC0_SEL = 0;
+ bits<1> SRC0_NEG = 0;
+ bits<9> SRC1_SEL = 0;
+ bits<1> SRC1_NEG = 0;
+ bits<1> CLAMP = 0;
+
+}
+
+def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
+ (ops PRED_SEL_OFF)>;
+
+
+class R600_1OP <bits<32> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <inst,
+ (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src, R600_Pred:$p, variable_ops),
+ !strconcat(opName, " $dst, $src ($p)"),
+ pattern,
+ itin
+ >;
+
+class R600_2OP <bits<32> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <inst,
+ (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops),
+ !strconcat(opName, " $dst, $src0, $src1"),
+ pattern,
+ itin
+ >;
+
+class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <inst,
+ (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2,R600_Pred:$p, variable_ops),
+ !strconcat(opName, " $dst, $src0, $src1, $src2"),
+ pattern,
+ itin>{
+
+ let Op3 = 1;
+ }
+
+
+
+def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst),
+ (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
+ "PRED $dst, $src0, $src1",
+ [], NullALU>
+{
+ let DisableEncoding = "$src0";
+ field bits<32> Inst;
+ bits<32> src1;
+
+ let Inst = src1;
+ let FlagOperandIdx = 3;
+}
+
+let isTerminator = 1, isBranch = 1, isPseudo = 1 in {
+def JUMP : InstR600 <0x10,
+ (outs),
+ (ins brtarget:$target, R600_Pred:$p),
+ "JUMP $target ($p)",
+ [], AnyALU
+ >;
+}
+
+class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin = VecALU> :
+ InstR600 <inst,
+ (outs R600_Reg32:$dst),
+ ins,
+ asm,
+ pattern,
+ itin
+
+ >;
+
+class R600_TEX <bits<32> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2),
+ !strconcat(opName, "$dst, $src0, $src1, $src2"),
+ pattern,
+ itin
+ >;
+
+def TEX_SHADOW : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12;
+ }]
+>;
+
+class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
+ dag ins, string asm, list<dag> pattern> :
+ InstR600ISA <outs, ins, asm, pattern>
+{
+ bits<7> RW_GPR;
+ bits<7> INDEX_GPR;
+
+ bits<2> RIM;
+ bits<2> TYPE;
+ bits<1> RW_REL;
+ bits<2> ELEM_SIZE;
+
+ bits<12> ARRAY_SIZE;
+ bits<4> COMP_MASK;
+ bits<4> BURST_COUNT;
+ bits<1> VPM;
+ bits<1> eop;
+ bits<1> MARK;
+ bits<1> BARRIER;
+
+ // CF_ALLOC_EXPORT_WORD0_RAT
+ let Inst{3-0} = rat_id;
+ let Inst{9-4} = rat_inst;
+ let Inst{10} = 0; // Reserved
+ let Inst{12-11} = RIM;
+ let Inst{14-13} = TYPE;
+ let Inst{21-15} = RW_GPR;
+ let Inst{22} = RW_REL;
+ let Inst{29-23} = INDEX_GPR;
+ let Inst{31-30} = ELEM_SIZE;
+
+ // CF_ALLOC_EXPORT_WORD1_BUF
+ let Inst{43-32} = ARRAY_SIZE;
+ let Inst{47-44} = COMP_MASK;
+ let Inst{51-48} = BURST_COUNT;
+ let Inst{52} = VPM;
+ let Inst{53} = eop;
+ let Inst{61-54} = cf_inst;
+ let Inst{62} = MARK;
+ let Inst{63} = BARRIER;
+}
+
+def load_param : PatFrag<(ops node:$ptr),
+ (load node:$ptr),
+ [{
+ const Value *Src = cast<LoadSDNode>(N)->getSrcValue();
+ if (Src) {
+ PointerType * PT = dyn_cast<PointerType>(Src->getType());
+ return PT && PT->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS;
+ }
+ return false;
+ }]>;
+
+def isR600 : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
+def isR700 : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
+ "Subtarget.device()->getDeviceFlag()"
+ ">= OCL_DEVICE_RV710">;
+def isEG : Predicate<"Subtarget.device()"
+ "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
+ "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
+def isCayman : Predicate<"Subtarget.device()"
+ "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
+def isEGorCayman : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
+ "|| Subtarget.device()->getGeneration() =="
+ "AMDGPUDeviceInfo::HD6XXX">;
+
+def isR600toCayman : Predicate<
+ "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
+
+
+let Predicates = [isR600toCayman] in {
+
+//===----------------------------------------------------------------------===//
+// Common Instructions R600, R700, Evergreen, Cayman
+//===----------------------------------------------------------------------===//
+
+def ADD : R600_2OP <
+ 0x0, "ADD",
+ [(set R600_Reg32:$dst, (fadd R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+// Non-IEEE MUL: 0 * anything = 0
+def MUL : R600_2OP <
+ 0x1, "MUL NON-IEEE",
+ [(set R600_Reg32:$dst, (int_AMDGPU_mul R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+def MUL_IEEE : R600_2OP <
+ 0x2, "MUL_IEEE",
+ [(set R600_Reg32:$dst, (fmul R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+def MAX : R600_2OP <
+ 0x3, "MAX",
+ [(set R600_Reg32:$dst, (AMDGPUfmax R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+def MIN : R600_2OP <
+ 0x4, "MIN",
+ [(set R600_Reg32:$dst, (AMDGPUfmin R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
+// so some of the instruction names don't match the asm string.
+// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
+
+def SETE : R600_2OP <
+ 0x08, "SETE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_EQ))]
+>;
+
+def SGT : R600_2OP <
+ 0x09, "SETGT",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_GT))]
+>;
+
+def SGE : R600_2OP <
+ 0xA, "SETGE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_GE))]
+>;
+
+def SNE : R600_2OP <
+ 0xB, "SETNE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_NE))]
+>;
+
+def FRACT : R600_1OP <
+ 0x10, "FRACT",
+ [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))]
+>;
+
+def TRUNC : R600_1OP <
+ 0x11, "TRUNC",
+ [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))]
+>;
+
+def CEIL : R600_1OP <
+ 0x12, "CEIL",
+ [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))]
+>;
+
+def RNDNE : R600_1OP <
+ 0x13, "RNDNE",
+ [(set R600_Reg32:$dst, (frint R600_Reg32:$src))]
+>;
+
+def FLOOR : R600_1OP <
+ 0x14, "FLOOR",
+ [(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))]
+>;
+
+def MOV : InstR600 <0x19, (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src0, i32imm:$flags,
+ R600_Pred:$p),
+ "MOV $dst, $src0", [], AnyALU> {
+ let FlagOperandIdx = 2;
+}
+
+class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19,
+ (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm),
+ "MOV_IMM $dst, $imm",
+ [], AnyALU
+>;
+
+def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
+def : Pat <
+ (imm:$val),
+ (MOV_IMM_I32 (i32 ALU_LITERAL_X), imm:$val)
+>;
+
+def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
+def : Pat <
+ (fpimm:$val),
+ (MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val)
+>;
+
+def KILLGT : InstR600 <0x2D,
+ (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags, R600_Pred:$p,
+ variable_ops),
+ "KILLGT $dst, $src0, $src1, $flags ($p)",
+ [],
+ NullALU>{
+ let FlagOperandIdx = 3;
+}
+
+def AND_INT : R600_2OP <
+ 0x30, "AND_INT",
+ [(set R600_Reg32:$dst, (and R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+def OR_INT : R600_2OP <
+ 0x31, "OR_INT",
+ [(set R600_Reg32:$dst, (or R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+def XOR_INT : R600_2OP <
+ 0x32, "XOR_INT",
+ [(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+def NOT_INT : R600_1OP <
+ 0x33, "NOT_INT",
+ [(set R600_Reg32:$dst, (not R600_Reg32:$src))]
+>;
+
+def ADD_INT : R600_2OP <
+ 0x34, "ADD_INT",
+ [(set R600_Reg32:$dst, (add R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+def SUB_INT : R600_2OP <
+ 0x35, "SUB_INT",
+ [(set R600_Reg32:$dst, (sub R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+def MAX_INT : R600_2OP <
+ 0x36, "MAX_INT",
+ [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]>;
+
+def MIN_INT : R600_2OP <
+ 0x37, "MIN_INT",
+ [(set R600_Reg32:$dst, (AMDGPUsmin R600_Reg32:$src0, R600_Reg32:$src1))]>;
+
+def MAX_UINT : R600_2OP <
+ 0x38, "MAX_UINT",
+ [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+def MIN_UINT : R600_2OP <
+ 0x39, "MIN_UINT",
+ [(set R600_Reg32:$dst, (AMDGPUumin R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+def SETE_INT : R600_2OP <
+ 0x3A, "SETE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
+>;
+
+def SETGT_INT : R600_2OP <
+ 0x3B, "SGT_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
+>;
+
+def SETGE_INT : R600_2OP <
+ 0x3C, "SETGE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
+>;
+
+def SETNE_INT : R600_2OP <
+ 0x3D, "SETNE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
+>;
+
+def SETGT_UINT : R600_2OP <
+ 0x3E, "SETGT_UINT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
+>;
+
+def SETGE_UINT : R600_2OP <
+ 0x3F, "SETGE_UINT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
+>;
+
+def CNDE_INT : R600_3OP <
+ 0x1C, "CNDE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Texture instructions
+//===----------------------------------------------------------------------===//
+
+def TEX_LD : R600_TEX <
+ 0x03, "TEX_LD",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$src4, imm:$src5))]
+> {
+let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $src4, $src5";
+let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5);
+}
+
+def TEX_GET_TEXTURE_RESINFO : R600_TEX <
+ 0x04, "TEX_GET_TEXTURE_RESINFO",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_GET_GRADIENTS_H : R600_TEX <
+ 0x07, "TEX_GET_GRADIENTS_H",
+ [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_GET_GRADIENTS_V : R600_TEX <
+ 0x08, "TEX_GET_GRADIENTS_V",
+ [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_SET_GRADIENTS_H : R600_TEX <
+ 0x0B, "TEX_SET_GRADIENTS_H",
+ []
+>;
+
+def TEX_SET_GRADIENTS_V : R600_TEX <
+ 0x0C, "TEX_SET_GRADIENTS_V",
+ []
+>;
+
+def TEX_SAMPLE : R600_TEX <
+ 0x10, "TEX_SAMPLE",
+ [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_SAMPLE_C : R600_TEX <
+ 0x18, "TEX_SAMPLE_C",
+ [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
+>;
+
+def TEX_SAMPLE_L : R600_TEX <
+ 0x11, "TEX_SAMPLE_L",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_SAMPLE_C_L : R600_TEX <
+ 0x19, "TEX_SAMPLE_C_L",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
+>;
+
+def TEX_SAMPLE_LB : R600_TEX <
+ 0x12, "TEX_SAMPLE_LB",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_SAMPLE_C_LB : R600_TEX <
+ 0x1A, "TEX_SAMPLE_C_LB",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))]
+>;
+
+def TEX_SAMPLE_G : R600_TEX <
+ 0x14, "TEX_SAMPLE_G",
+ []
+>;
+
+def TEX_SAMPLE_C_G : R600_TEX <
+ 0x1C, "TEX_SAMPLE_C_G",
+ []
+>;
+
+//===----------------------------------------------------------------------===//
+// Helper classes for common instructions
+//===----------------------------------------------------------------------===//
+
+class MUL_LIT_Common <bits<32> inst> : R600_3OP <
+ inst, "MUL_LIT",
+ []
+>;
+
+class MULADD_Common <bits<32> inst> : R600_3OP <
+ inst, "MULADD",
+ [(set (f32 R600_Reg32:$dst),
+ (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
+>;
+
+class CNDE_Common <bits<32> inst> : R600_3OP <
+ inst, "CNDE",
+ [(set (f32 R600_Reg32:$dst),
+ (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))]
+>;
+
+class CNDGT_Common <bits<32> inst> : R600_3OP <
+ inst, "CNDGT",
+ []
+>;
+
+class CNDGE_Common <bits<32> inst> : R600_3OP <
+ inst, "CNDGE",
+ [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
+>;
+
+class DOT4_Common <bits<32> inst> : R600_REDUCTION <
+ inst,
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags),
+ "DOT4 $dst $src0, $src1",
+ []
+ > {
+ let FlagOperandIdx = 3;
+}
+
+class DOT4_Pat <Instruction dot4> : Pat <
+ (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1),
+ (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0)
+>;
+
+multiclass CUBE_Common <bits<32> inst> {
+
+ def _pseudo : InstR600 <
+ inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src),
+ "CUBE $dst $src",
+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+ VecALU
+ >;
+
+ def _real : InstR600 <
+ inst,
+ (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
+ "CUBE $dst, $src0, $src1",
+ [], VecALU
+ >{
+ let FlagOperandIdx = 3;
+ }
+}
+
+class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
+ inst, "EXP_IEEE",
+ [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))]
+>;
+
+class FLT_TO_INT_Common <bits<32> inst> : R600_1OP <
+ inst, "FLT_TO_INT",
+ [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))]
+>;
+
+class INT_TO_FLT_Common <bits<32> inst> : R600_1OP <
+ inst, "INT_TO_FLT",
+ [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))]
+>;
+
+class FLT_TO_UINT_Common <bits<32> inst> : R600_1OP <
+ inst, "FLT_TO_UINT",
+ [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))]
+>;
+
+class UINT_TO_FLT_Common <bits<32> inst> : R600_1OP <
+ inst, "UINT_TO_FLT",
+ [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))]
+>;
+
+class LOG_CLAMPED_Common <bits<32> inst> : R600_1OP <
+ inst, "LOG_CLAMPED",
+ []
+>;
+
+class LOG_IEEE_Common <bits<32> inst> : R600_1OP <
+ inst, "LOG_IEEE",
+ [(set R600_Reg32:$dst, (int_AMDIL_log R600_Reg32:$src))]
+>;
+
+class LSHL_Common <bits<32> inst> : R600_2OP <
+ inst, "LSHL $dst, $src0, $src1",
+ [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+class LSHR_Common <bits<32> inst> : R600_2OP <
+ inst, "LSHR $dst, $src0, $src1",
+ [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+class ASHR_Common <bits<32> inst> : R600_2OP <
+ inst, "ASHR $dst, $src0, $src1",
+ [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+class MULHI_INT_Common <bits<32> inst> : R600_2OP <
+ inst, "MULHI_INT $dst, $src0, $src1",
+ [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+class MULHI_UINT_Common <bits<32> inst> : R600_2OP <
+ inst, "MULHI $dst, $src0, $src1",
+ [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+class MULLO_INT_Common <bits<32> inst> : R600_2OP <
+ inst, "MULLO_INT $dst, $src0, $src1",
+ [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))]
+>;
+
+class MULLO_UINT_Common <bits<32> inst> : R600_2OP <
+ inst, "MULLO_UINT $dst, $src0, $src1",
+ []
+>;
+
+class RECIP_CLAMPED_Common <bits<32> inst> : R600_1OP <
+ inst, "RECIP_CLAMPED",
+ []
+>;
+
+class RECIP_IEEE_Common <bits<32> inst> : R600_1OP <
+ inst, "RECIP_IEEE",
+ [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))]
+>;
+
+class RECIP_UINT_Common <bits<32> inst> : R600_1OP <
+ inst, "RECIP_INT $dst, $src",
+ [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))]
+>;
+
+class RECIPSQRT_CLAMPED_Common <bits<32> inst> : R600_1OP <
+ inst, "RECIPSQRT_CLAMPED",
+ [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))]
+>;
+
+class RECIPSQRT_IEEE_Common <bits<32> inst> : R600_1OP <
+ inst, "RECIPSQRT_IEEE",
+ []
+>;
+
+class SIN_Common <bits<32> inst> : R600_1OP <
+ inst, "SIN", []>{
+ let Trig = 1;
+}
+
+class COS_Common <bits<32> inst> : R600_1OP <
+ inst, "COS", []> {
+ let Trig = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper patterns for complex intrinsics
+//===----------------------------------------------------------------------===//
+
+class DIV_Common <InstR600 recip_ieee> : Pat<
+ (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
+ (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+>;
+
+class SSG_Common <InstR600 cndgt, InstR600 cndge> : Pat <
+ (int_AMDGPU_ssg R600_Reg32:$src),
+ (cndgt R600_Reg32:$src, (f32 ONE), (cndge R600_Reg32:$src, (f32 ZERO), (f32 NEG_ONE)))
+>;
+
+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
+ (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
+ (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
+>;
+
+//===----------------------------------------------------------------------===//
+// R600 / R700 Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR600] in {
+
+ def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
+ def MULADD_r600 : MULADD_Common<0x10>;
+ def CNDE_r600 : CNDE_Common<0x18>;
+ def CNDGT_r600 : CNDGT_Common<0x19>;
+ def CNDGE_r600 : CNDGE_Common<0x1A>;
+ def DOT4_r600 : DOT4_Common<0x50>;
+ def : DOT4_Pat <DOT4_r600>;
+ defm CUBE_r600 : CUBE_Common<0x52>;
+ def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
+ def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
+ def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
+ def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
+ def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
+ def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
+ def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
+ def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
+ def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
+ def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
+ def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
+ def SIN_r600 : SIN_Common<0x6E>;
+ def COS_r600 : COS_Common<0x6F>;
+ def ASHR_r600 : ASHR_Common<0x70>;
+ def LSHR_r600 : LSHR_Common<0x71>;
+ def LSHL_r600 : LSHL_Common<0x72>;
+ def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
+ def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
+ def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
+ def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
+ def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
+
+ def DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
+ def POW_r600 : POW_Common<LOG_IEEE_r600, EXP_IEEE_r600, MUL, GPRF32>;
+ def SSG_r600 : SSG_Common<CNDGT_r600, CNDGE_r600>;
+ def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
+
+}
+
+// Helper pattern for normalizing inputs to triginomic instructions for R700+
+// cards.
+class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat<
+ (intr R600_Reg32:$src),
+ (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src))
+>;
+
+//===----------------------------------------------------------------------===//
+// R700 Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR700] in {
+ def SIN_r700 : SIN_Common<0x6E>;
+ def COS_r700 : COS_Common<0x6F>;
+
+ // R700 normalizes inputs to SIN/COS the same as EG
+ def : TRIG_eg <SIN_r700, int_AMDGPU_sin>;
+ def : TRIG_eg <COS_r700, int_AMDGPU_cos>;
+}
+
+//===----------------------------------------------------------------------===//
+// Evergreen Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEG] in {
+
+def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
+
+def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
+def MULHI_INT_eg : MULHI_INT_Common<0x90>;
+def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
+def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
+
+} // End Predicates = [isEG]
+
+//===----------------------------------------------------------------------===//
+// Evergreen / Cayman Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEGorCayman] in {
+
+ // BFE_UINT - bit_extract, an optimization for mask and shift
+ // Src0 = Input
+ // Src1 = Offset
+ // Src2 = Width
+ //
+ // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
+ //
+ // Example Usage:
+ // (Offset, Width)
+ //
+ // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
+ // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
+ // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
+ // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
+ def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
+ [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
+ R600_Reg32:$src1,
+ R600_Reg32:$src2))],
+ VecALU
+ >;
+
+ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
+ [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
+ R600_Reg32:$src2))],
+ VecALU
+ >;
+
+ def MULADD_eg : MULADD_Common<0x14>;
+ def ASHR_eg : ASHR_Common<0x15>;
+ def LSHR_eg : LSHR_Common<0x16>;
+ def LSHL_eg : LSHL_Common<0x17>;
+ def CNDE_eg : CNDE_Common<0x19>;
+ def CNDGT_eg : CNDGT_Common<0x1A>;
+ def CNDGE_eg : CNDGE_Common<0x1B>;
+ def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
+ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
+ def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
+ def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
+ def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
+ def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
+ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+ def SIN_eg : SIN_Common<0x8D>;
+ def COS_eg : COS_Common<0x8E>;
+ def DOT4_eg : DOT4_Common<0xBE>;
+ def : DOT4_Pat <DOT4_eg>;
+ defm CUBE_eg : CUBE_Common<0xC0>;
+
+ def DIV_eg : DIV_Common<RECIP_IEEE_eg>;
+ def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>;
+ def SSG_eg : SSG_Common<CNDGT_eg, CNDGE_eg>;
+ def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
+
+ def : TRIG_eg <SIN_eg, int_AMDGPU_sin>;
+ def : TRIG_eg <COS_eg, int_AMDGPU_cos>;
+
+ def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
+ let Pattern = [];
+ }
+
+ def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
+
+ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
+ let Pattern = [];
+ }
+
+ def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
+
+ def : Pat<(fp_to_sint R600_Reg32:$src),
+ (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>;
+
+ def : Pat<(fp_to_uint R600_Reg32:$src),
+ (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>;
+
+//===----------------------------------------------------------------------===//
+// Memory read/write instructions
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1 in {
+
+def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs),
+ (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop),
+ "RAT_WRITE_CACHELESS_eg $rw_gpr, $index_gpr, $eop",
+ []>
+{
+ let RIM = 0;
+ // XXX: Have a separate instruction for non-indexed writes.
+ let TYPE = 1;
+ let RW_REL = 0;
+ let ELEM_SIZE = 0;
+
+ let ARRAY_SIZE = 0;
+ let COMP_MASK = 1;
+ let BURST_COUNT = 0;
+ let VPM = 0;
+ let MARK = 0;
+ let BARRIER = 1;
+}
+
+} // End usesCustomInserter = 1
+
+// i32 global_store
+def : Pat <
+ (global_store (i32 R600_TReg32_X:$val), R600_TReg32_X:$ptr),
+ (RAT_WRITE_CACHELESS_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0)
+>;
+
+// Floating point global_store
+def : Pat <
+ (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr),
+ (RAT_WRITE_CACHELESS_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0)
+>;
+
+class VTX_READ_eg <bits<8> buffer_id, dag outs, list<dag> pattern>
+ : InstR600ISA <outs, (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", pattern> {
+
+ // Operands
+ bits<7> DST_GPR;
+ bits<7> SRC_GPR;
+
+ // Static fields
+ bits<5> VC_INST = 0;
+ bits<2> FETCH_TYPE = 2;
+ bits<1> FETCH_WHOLE_QUAD = 0;
+ bits<8> BUFFER_ID = buffer_id;
+ bits<1> SRC_REL = 0;
+ // XXX: We can infer this field based on the SRC_GPR. This would allow us
+ // to store vertex addresses in any channel, not just X.
+ bits<2> SRC_SEL_X = 0;
+ bits<6> MEGA_FETCH_COUNT;
+ bits<1> DST_REL = 0;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
+ // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
+ // however, based on my testing if USE_CONST_FIELDS is set, then all
+ // these fields need to be set to 0.
+ bits<1> USE_CONST_FIELDS = 0;
+ bits<6> DATA_FORMAT;
+ bits<2> NUM_FORMAT_ALL = 1;
+ bits<1> FORMAT_COMP_ALL = 0;
+ bits<1> SRF_MODE_ALL = 0;
+
+ // LLVM can only encode 64-bit instructions, so these fields are manually
+ // encoded in R600CodeEmitter
+ //
+ // bits<16> OFFSET;
+ // bits<2> ENDIAN_SWAP = 0;
+ // bits<1> CONST_BUF_NO_STRIDE = 0;
+ // bits<1> MEGA_FETCH = 0;
+ // bits<1> ALT_CONST = 0;
+ // bits<2> BUFFER_INDEX_MODE = 0;
+
+ // VTX_WORD0
+ let Inst{4-0} = VC_INST;
+ let Inst{6-5} = FETCH_TYPE;
+ let Inst{7} = FETCH_WHOLE_QUAD;
+ let Inst{15-8} = BUFFER_ID;
+ let Inst{22-16} = SRC_GPR;
+ let Inst{23} = SRC_REL;
+ let Inst{25-24} = SRC_SEL_X;
+ let Inst{31-26} = MEGA_FETCH_COUNT;
+
+ // VTX_WORD1_GPR
+ let Inst{38-32} = DST_GPR;
+ let Inst{39} = DST_REL;
+ let Inst{40} = 0; // Reserved
+ let Inst{43-41} = DST_SEL_X;
+ let Inst{46-44} = DST_SEL_Y;
+ let Inst{49-47} = DST_SEL_Z;
+ let Inst{52-50} = DST_SEL_W;
+ let Inst{53} = USE_CONST_FIELDS;
+ let Inst{59-54} = DATA_FORMAT;
+ let Inst{61-60} = NUM_FORMAT_ALL;
+ let Inst{62} = FORMAT_COMP_ALL;
+ let Inst{63} = SRF_MODE_ALL;
+
+ // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+ // is done in R600CodeEmitter
+ //
+ // Inst{79-64} = OFFSET;
+ // Inst{81-80} = ENDIAN_SWAP;
+ // Inst{82} = CONST_BUF_NO_STRIDE;
+ // Inst{83} = MEGA_FETCH;
+ // Inst{84} = ALT_CONST;
+ // Inst{86-85} = BUFFER_INDEX_MODE;
+ // Inst{95-86} = 0; Reserved
+
+ // VTX_WORD3 (Padding)
+ //
+ // Inst{127-96} = 0;
+}
+
+class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> {
+
+ let MEGA_FETCH_COUNT = 4;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 0xD; // COLOR_32
+
+ // This is not really necessary, but there were some GPU hangs that appeared
+ // to be caused by ALU instructions in the next instruction group that wrote
+ // to the $ptr registers of the VTX_READ.
+ // e.g.
+ // %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24
+ // %T2_X<def> = MOV %ZERO
+ //Adding this constraint prevents this from happening.
+ let Constraints = "$ptr.ptr = $dst";
+}
+
+class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <buffer_id, (outs R600_Reg128:$dst), pattern> {
+
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
+
+ // XXX: Need to force VTX_READ_128 instructions to write to the same register
+ // that holds its buffer address to avoid potential hangs. We can't use
+ // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
+ // registers are different sizes.
+}
+
+//===----------------------------------------------------------------------===//
+// VTX Read from parameter memory space
+//===----------------------------------------------------------------------===//
+
+class VTX_READ_PARAM_32_eg <ValueType vt> : VTX_READ_32_eg <0,
+ [(set (vt R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_i32_eg : VTX_READ_PARAM_32_eg<i32>;
+def VTX_READ_PARAM_f32_eg : VTX_READ_PARAM_32_eg<f32>;
+
+
+//===----------------------------------------------------------------------===//
+// VTX Read from global memory space
+//===----------------------------------------------------------------------===//
+
+// 32-bit reads
+
+class VTX_READ_GLOBAL_eg <ValueType vt> : VTX_READ_32_eg <1,
+ [(set (vt R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_GLOBAL_i32_eg : VTX_READ_GLOBAL_eg<i32>;
+def VTX_READ_GLOBAL_f32_eg : VTX_READ_GLOBAL_eg<f32>;
+
+// 128-bit reads
+
+class VTX_READ_GLOBAL_128_eg <ValueType vt> : VTX_READ_128_eg <1,
+ [(set (vt R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_GLOBAL_v4i32_eg : VTX_READ_GLOBAL_128_eg<v4i32>;
+def VTX_READ_GLOBAL_v4f32_eg : VTX_READ_GLOBAL_128_eg<v4f32>;
+
+}
+
+let Predicates = [isCayman] in {
+
+let isVector = 1 in {
+
+def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
+
+def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
+def MULHI_INT_cm : MULHI_INT_Common<0x90>;
+def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
+
+} // End isVector = 1
+
+// RECIP_UINT emulation for Cayman
+def : Pat <
+ (AMDGPUurecip R600_Reg32:$src0),
+ (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
+ (MOV_IMM_I32 (i32 ALU_LITERAL_X), 0x4f800000)))
+>;
+
+} // End isCayman
+
+let isCodeGenOnly = 1 in {
+
+ def MULLIT : AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "MULLIT $dst, $src0, $src1",
+ [(set R600_Reg128:$dst, (int_AMDGPU_mullit R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
+ >;
+
+let usesCustomInserter = 1, isPseudo = 1 in {
+
+class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst <
+ (outs R600_TReg32:$dst),
+ (ins),
+ asm,
+ [(set R600_TReg32:$dst, (intr))]
+>;
+
+def R600_LOAD_CONST : AMDGPUShaderInst <
+ (outs R600_Reg32:$dst),
+ (ins i32imm:$src0),
+ "R600_LOAD_CONST $dst, $src0",
+ [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
+>;
+
+def RESERVE_REG : AMDGPUShaderInst <
+ (outs),
+ (ins i32imm:$src),
+ "RESERVE_REG $src",
+ [(int_AMDGPU_reserve_reg imm:$src)]
+>;
+
+def TXD: AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4),
+ "TXD $dst, $src0, $src1, $src2, $src3, $src4",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, imm:$src4))]
+>;
+
+def TXD_SHADOW: AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4),
+ "TXD_SHADOW $dst, $src0, $src1, $src2, $src3, $src4",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, TEX_SHADOW:$src4))]
+>;
+
+} // End usesCustomInserter = 1, isPseudo = 1
+
+} // End isCodeGenOnly = 1
+
+def CLAMP_R600 : CLAMP <R600_Reg32>;
+def FABS_R600 : FABS<R600_Reg32>;
+def FNEG_R600 : FNEG<R600_Reg32>;
+
+let usesCustomInserter = 1 in {
+
+def MASK_WRITE : AMDGPUShaderInst <
+ (outs),
+ (ins R600_Reg32:$src),
+ "MASK_WRITE $src",
+ []
+>;
+
+} // End usesCustomInserter = 1
+
+//===---------------------------------------------------------------------===//
+// Return instruction
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def RETURN : ILFormat<(outs), (ins variable_ops),
+ "RETURN", [(IL_retflag)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// ISel Patterns
+//===----------------------------------------------------------------------===//
+
+// KIL Patterns
+def KILP : Pat <
+ (int_AMDGPU_kilp),
+ (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO), 0))
+>;
+
+def KIL : Pat <
+ (int_AMDGPU_kill R600_Reg32:$src0),
+ (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0), 0))
+>;
+
+// SGT Reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
+ (SGT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SGE Reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
+ (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_INT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
+ (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_INT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
+ (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_UINT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
+ (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_UINT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
+ (SETGE_UINT R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+// The next two patterns are special cases for handling 'true if ordered' and
+// 'true if unordered' conditionals. The assumption here is that the behavior of
+// SETE and SNE conforms to the Direct3D 10 rules for floating point values
+// described here:
+// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
+// We assume that SETE returns false when one of the operands is NAN and
+// SNE returns true when on of the operands is NAN
+
+//SETE - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
+ (SETE R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SNE - 'true if unordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
+ (SNE R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
+
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 4, sel_x>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>;
+
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
+
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 4, sel_x>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>;
+
+def : Vector_Build <v4f32, R600_Reg32>;
+def : Vector_Build <v4i32, R600_Reg32>;
+
+// bitconvert patterns
+
+def : BitConvert <i32, f32, R600_Reg32>;
+def : BitConvert <f32, i32, R600_Reg32>;
+def : BitConvert <v4f32, v4i32, R600_Reg128>;
+
+} // End isR600toCayman Predicate
diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td
new file mode 100644
index 0000000..0265388
--- /dev/null
+++ b/lib/Target/AMDGPU/R600Intrinsics.td
@@ -0,0 +1,16 @@
+//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "R600", isTarget = 1 in {
+ def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+}
diff --git a/lib/Target/AMDGPU/R600KernelParameters.cpp b/lib/Target/AMDGPU/R600KernelParameters.cpp
new file mode 100644
index 0000000..b589fd9
--- /dev/null
+++ b/lib/Target/AMDGPU/R600KernelParameters.cpp
@@ -0,0 +1,462 @@
+//===-- R600KernelParameters.cpp - Lower kernel function arguments --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers kernel function arguments to loads from the vertex buffer.
+//
+// Kernel arguemnts are stored in the vertex buffer at an offset of 9 dwords,
+// so arg0 needs to be loaded from VTX_BUFFER[9] and arg1 is loaded from
+// VTX_BUFFER[10], etc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/TypeBuilder.h"
+#include "llvm/Target/TargetData.h"
+
+#include <map>
+#include <set>
+
+using namespace llvm;
+
+namespace {
+
+#define CONSTANT_CACHE_SIZE_DW 127
+
+class R600KernelParameters : public FunctionPass {
+ const TargetData *TD;
+ LLVMContext* Context;
+ Module *Mod;
+
+ struct Param {
+ Param() : Val(NULL), PtrVal(NULL), OffsetInDW(0), SizeInDW(0),
+ IsIndirect(true), SpecialID(0) {}
+
+ Value* Val;
+ Value* PtrVal;
+ int OffsetInDW;
+ int SizeInDW;
+
+ bool IsIndirect;
+
+ std::string SpecialType;
+ int SpecialID;
+
+ int End() { return OffsetInDW + SizeInDW; }
+ // The first 9 dwords are reserved for the grid sizes.
+ int getRatOffset() { return 9 + OffsetInDW; }
+ };
+
+ std::vector<Param> Params;
+
+ bool IsOpenCLKernel(const Function *Fun);
+ int getLastSpecialID(const std::string& TypeName);
+
+ int getListSize();
+ void AddParam(Argument *Arg);
+ int CalculateArgumentSize(Argument *Arg);
+ void RunAna(Function *Fun);
+ void Replace(Function *Fun);
+ bool IsIndirect(Value *Val, std::set<Value*> &Visited);
+ void Propagate(Function* Fun);
+ void Propagate(Value *V, const Twine &Name, bool IsIndirect = true);
+ Value* ConstantRead(Function *Fun, Param &P);
+ Value* handleSpecial(Function *Fun, Param &P);
+ bool IsSpecialType(Type *T);
+ std::string getSpecialTypeName(Type *T);
+public:
+ static char ID;
+ R600KernelParameters() : FunctionPass(ID) {}
+ R600KernelParameters(const TargetData* TD) : FunctionPass(ID), TD(TD) {}
+ bool runOnFunction (Function &F);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ const char *getPassName() const;
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+};
+
+char R600KernelParameters::ID = 0;
+
+static RegisterPass<R600KernelParameters> X("kerparam",
+ "OpenCL Kernel Parameter conversion", false, false);
+
+bool R600KernelParameters::IsOpenCLKernel(const Function* Fun) {
+ Module *Mod = const_cast<Function*>(Fun)->getParent();
+ NamedMDNode * MD = Mod->getOrInsertNamedMetadata("opencl.kernels");
+
+ if (!MD || !MD->getNumOperands()) {
+ return false;
+ }
+
+ for (int i = 0; i < int(MD->getNumOperands()); i++) {
+ if (!MD->getOperand(i) || !MD->getOperand(i)->getOperand(0)) {
+ continue;
+ }
+
+ assert(MD->getOperand(i)->getNumOperands() == 1);
+
+ if (MD->getOperand(i)->getOperand(0)->getName() == Fun->getName()) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+int R600KernelParameters::getLastSpecialID(const std::string &TypeName) {
+ int LastID = -1;
+
+ for (std::vector<Param>::iterator i = Params.begin(); i != Params.end(); i++) {
+ if (i->SpecialType == TypeName) {
+ LastID = i->SpecialID;
+ }
+ }
+
+ return LastID;
+}
+
+int R600KernelParameters::getListSize() {
+ if (Params.size() == 0) {
+ return 0;
+ }
+
+ return Params.back().End();
+}
+
+bool R600KernelParameters::IsIndirect(Value *Val, std::set<Value*> &Visited) {
+ //XXX Direct parameters are not supported yet, so return true here.
+ return true;
+#if 0
+ if (isa<LoadInst>(Val)) {
+ return false;
+ }
+
+ if (isa<IntegerType>(Val->getType())) {
+ assert(0 && "Internal error");
+ return false;
+ }
+
+ if (Visited.count(Val)) {
+ return false;
+ }
+
+ Visited.insert(Val);
+
+ if (isa<getElementPtrInst>(Val)) {
+ getElementPtrInst* GEP = dyn_cast<getElementPtrInst>(Val);
+ getElementPtrInst::op_iterator I = GEP->op_begin();
+
+ for (++I; I != GEP->op_end(); ++I) {
+ if (!isa<Constant>(*I)) {
+ return true;
+ }
+ }
+ }
+
+ for (Value::use_iterator I = Val->use_begin(); i != Val->use_end(); ++I) {
+ Value* V2 = dyn_cast<Value>(*I);
+
+ if (V2) {
+ if (IsIndirect(V2, Visited)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+#endif
+}
+
+void R600KernelParameters::AddParam(Argument *Arg) {
+ Param P;
+
+ P.Val = dyn_cast<Value>(Arg);
+ P.OffsetInDW = getListSize();
+ P.SizeInDW = CalculateArgumentSize(Arg);
+
+ if (isa<PointerType>(Arg->getType()) && Arg->hasByValAttr()) {
+ std::set<Value*> Visited;
+ P.IsIndirect = IsIndirect(P.Val, Visited);
+ }
+
+ Params.push_back(P);
+}
+
+int R600KernelParameters::CalculateArgumentSize(Argument *Arg) {
+ Type* T = Arg->getType();
+
+ if (Arg->hasByValAttr() && dyn_cast<PointerType>(T)) {
+ T = dyn_cast<PointerType>(T)->getElementType();
+ }
+
+ int StoreSizeInDW = (TD->getTypeStoreSize(T) + 3)/4;
+
+ assert(StoreSizeInDW);
+
+ return StoreSizeInDW;
+}
+
+
+void R600KernelParameters::RunAna(Function* Fun) {
+ assert(IsOpenCLKernel(Fun));
+
+ for (Function::arg_iterator I = Fun->arg_begin(); I != Fun->arg_end(); ++I) {
+ AddParam(I);
+ }
+
+}
+
+void R600KernelParameters::Replace(Function* Fun) {
+ for (std::vector<Param>::iterator I = Params.begin(); I != Params.end(); ++I) {
+ Value *NewVal;
+
+ if (IsSpecialType(I->Val->getType())) {
+ NewVal = handleSpecial(Fun, *I);
+ } else {
+ NewVal = ConstantRead(Fun, *I);
+ }
+ if (NewVal) {
+ I->Val->replaceAllUsesWith(NewVal);
+ }
+ }
+}
+
+void R600KernelParameters::Propagate(Function* Fun) {
+ for (std::vector<Param>::iterator I = Params.begin(); I != Params.end(); ++I) {
+ if (I->PtrVal) {
+ Propagate(I->PtrVal, I->Val->getName(), I->IsIndirect);
+ }
+ }
+}
+
+void R600KernelParameters::Propagate(Value* V, const Twine& Name, bool IsIndirect) {
+ LoadInst* Load = dyn_cast<LoadInst>(V);
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);
+
+ unsigned Addrspace;
+
+ if (IsIndirect) {
+ Addrspace = AMDGPUAS::PARAM_I_ADDRESS;
+ } else {
+ Addrspace = AMDGPUAS::PARAM_D_ADDRESS;
+ }
+
+ if (GEP && GEP->getType()->getAddressSpace() != Addrspace) {
+ Value *Op = GEP->getPointerOperand();
+
+ if (dyn_cast<PointerType>(Op->getType())->getAddressSpace() != Addrspace) {
+ Op = new BitCastInst(Op, PointerType::get(dyn_cast<PointerType>(
+ Op->getType())->getElementType(), Addrspace),
+ Name, dyn_cast<Instruction>(V));
+ }
+
+ std::vector<Value*> Params(GEP->idx_begin(), GEP->idx_end());
+
+ GetElementPtrInst* GEP2 = GetElementPtrInst::Create(Op, Params, Name,
+ dyn_cast<Instruction>(V));
+ GEP2->setIsInBounds(GEP->isInBounds());
+ V = dyn_cast<Value>(GEP2);
+ GEP->replaceAllUsesWith(GEP2);
+ GEP->eraseFromParent();
+ Load = NULL;
+ }
+
+ if (Load) {
+ ///normally at this point we have the right address space
+ if (Load->getPointerAddressSpace() != Addrspace) {
+ Value *OrigPtr = Load->getPointerOperand();
+ PointerType *OrigPtrType = dyn_cast<PointerType>(OrigPtr->getType());
+
+ Type* NewPtrType = PointerType::get(OrigPtrType->getElementType(),
+ Addrspace);
+
+ Value* NewPtr = OrigPtr;
+
+ if (OrigPtr->getType() != NewPtrType) {
+ NewPtr = new BitCastInst(OrigPtr, NewPtrType, "prop_cast", Load);
+ }
+
+ Value* new_Load = new LoadInst(NewPtr, Name, Load);
+ Load->replaceAllUsesWith(new_Load);
+ Load->eraseFromParent();
+ }
+
+ return;
+ }
+
+ std::vector<User*> Users(V->use_begin(), V->use_end());
+
+ for (int i = 0; i < int(Users.size()); i++) {
+ Value* V2 = dyn_cast<Value>(Users[i]);
+
+ if (V2) {
+ Propagate(V2, Name, IsIndirect);
+ }
+ }
+}
+
+Value* R600KernelParameters::ConstantRead(Function *Fun, Param &P) {
+ assert(Fun->front().begin() != Fun->front().end());
+
+ Instruction *FirstInst = Fun->front().begin();
+ IRBuilder <> Builder (FirstInst);
+/* First 3 dwords are reserved for the dimmension info */
+
+ if (!P.Val->hasNUsesOrMore(1)) {
+ return NULL;
+ }
+ unsigned Addrspace;
+
+ if (P.IsIndirect) {
+ Addrspace = AMDGPUAS::PARAM_I_ADDRESS;
+ } else {
+ Addrspace = AMDGPUAS::PARAM_D_ADDRESS;
+ }
+
+ Argument *Arg = dyn_cast<Argument>(P.Val);
+ Type * ArgType = P.Val->getType();
+ PointerType * ArgPtrType = dyn_cast<PointerType>(P.Val->getType());
+
+ if (ArgPtrType && Arg->hasByValAttr()) {
+ Value* ParamAddrSpacePtr = ConstantPointerNull::get(
+ PointerType::get(Type::getInt32Ty(*Context),
+ Addrspace));
+ Value* ParamPtr = GetElementPtrInst::Create(ParamAddrSpacePtr,
+ ConstantInt::get(Type::getInt32Ty(*Context),
+ P.getRatOffset()), Arg->getName(),
+ FirstInst);
+ ParamPtr = new BitCastInst(ParamPtr,
+ PointerType::get(ArgPtrType->getElementType(),
+ Addrspace),
+ Arg->getName(), FirstInst);
+ P.PtrVal = ParamPtr;
+ return ParamPtr;
+ } else {
+ Value *ParamAddrSpacePtr = ConstantPointerNull::get(PointerType::get(
+ ArgType, Addrspace));
+
+ Value *ParamPtr = Builder.CreateGEP(ParamAddrSpacePtr,
+ ConstantInt::get(Type::getInt32Ty(*Context), P.getRatOffset()),
+ Arg->getName());
+
+ Value *Param_Value = Builder.CreateLoad(ParamPtr, Arg->getName());
+
+ return Param_Value;
+ }
+}
+
+Value* R600KernelParameters::handleSpecial(Function* Fun, Param& P) {
+ std::string Name = getSpecialTypeName(P.Val->getType());
+ int ID;
+
+ assert(!Name.empty());
+
+ if (Name == "image2d_t" || Name == "image3d_t") {
+ int LastID = std::max(getLastSpecialID("image2d_t"),
+ getLastSpecialID("image3d_t"));
+
+ if (LastID == -1) {
+ ID = 2; ///ID0 and ID1 are used internally by the driver
+ } else {
+ ID = LastID + 1;
+ }
+ } else if (Name == "sampler_t") {
+ int LastID = getLastSpecialID("sampler_t");
+
+ if (LastID == -1) {
+ ID = 0;
+ } else {
+ ID = LastID + 1;
+ }
+ } else {
+ ///TODO: give some error message
+ return NULL;
+ }
+
+ P.SpecialType = Name;
+ P.SpecialID = ID;
+
+ Instruction *FirstInst = Fun->front().begin();
+
+ return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context),
+ P.SpecialID), P.Val->getType(),
+ "resourceID", FirstInst);
+}
+
+
+bool R600KernelParameters::IsSpecialType(Type* T) {
+ return !getSpecialTypeName(T).empty();
+}
+
+std::string R600KernelParameters::getSpecialTypeName(Type* T) {
+ PointerType *PT = dyn_cast<PointerType>(T);
+ StructType *ST = NULL;
+
+ if (PT) {
+ ST = dyn_cast<StructType>(PT->getElementType());
+ }
+
+ if (ST) {
+ std::string Prefix = "struct.opencl_builtin_type_";
+
+ std::string Name = ST->getName().str();
+
+ if (Name.substr(0, Prefix.length()) == Prefix) {
+ return Name.substr(Prefix.length(), Name.length());
+ }
+ }
+
+ return "";
+}
+
+
+bool R600KernelParameters::runOnFunction (Function &F) {
+ if (!IsOpenCLKernel(&F)) {
+ return false;
+ }
+
+ RunAna(&F);
+ Replace(&F);
+ Propagate(&F);
+
+ return false;
+}
+
+void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
+
+const char *R600KernelParameters::getPassName() const {
+ return "OpenCL Kernel parameter conversion to memory";
+}
+
+bool R600KernelParameters::doInitialization(Module &M) {
+ Context = &M.getContext();
+ Mod = &M;
+
+ return false;
+}
+
+bool R600KernelParameters::doFinalization(Module &M) {
+ return false;
+}
+
+} // End anonymous namespace
+
+FunctionPass* llvm::createR600KernelParametersPass(const TargetData* TD) {
+ return new R600KernelParameters(TD);
+}
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
new file mode 100644
index 0000000..48443fb
--- /dev/null
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
@@ -0,0 +1,16 @@
+//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
+ : MachineFunctionInfo()
+ { }
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
new file mode 100644
index 0000000..948e192
--- /dev/null
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -0,0 +1,33 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600MachineFunctionInfo is used for keeping track of which registers have
+// been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINEFUNCTIONINFO_H
+#define R600MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include <vector>
+
+namespace llvm {
+
+class R600MachineFunctionInfo : public MachineFunctionInfo {
+
+public:
+ R600MachineFunctionInfo(const MachineFunction &MF);
+ std::vector<unsigned> ReservedRegs;
+
+};
+
+} // End llvm namespace
+
+#endif //R600MACHINEFUNCTIONINFO_H
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp
new file mode 100644
index 0000000..ac34614
--- /dev/null
+++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -0,0 +1,108 @@
+//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The file contains the R600 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600RegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
+{
+ BitVector Reserved(getNumRegs());
+ const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
+
+ Reserved.set(AMDGPU::ZERO);
+ Reserved.set(AMDGPU::HALF);
+ Reserved.set(AMDGPU::ONE);
+ Reserved.set(AMDGPU::ONE_INT);
+ Reserved.set(AMDGPU::NEG_HALF);
+ Reserved.set(AMDGPU::NEG_ONE);
+ Reserved.set(AMDGPU::PV_X);
+ Reserved.set(AMDGPU::ALU_LITERAL_X);
+ Reserved.set(AMDGPU::PREDICATE_BIT);
+ Reserved.set(AMDGPU::PRED_SEL_OFF);
+ Reserved.set(AMDGPU::PRED_SEL_ZERO);
+ Reserved.set(AMDGPU::PRED_SEL_ONE);
+
+ for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
+ E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
+ Reserved.set(*I);
+ }
+
+ for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
+ E = MFI->ReservedRegs.end(); I != E; ++I) {
+ Reserved.set(*I);
+ }
+
+ return Reserved;
+}
+
+const TargetRegisterClass *
+R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
+{
+ switch (rc->getID()) {
+ case AMDGPU::GPRF32RegClassID:
+ case AMDGPU::GPRI32RegClassID:
+ return &AMDGPU::R600_Reg32RegClass;
+ default: return rc;
+ }
+}
+
+unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
+{
+ switch(reg) {
+ case AMDGPU::ZERO:
+ case AMDGPU::ONE:
+ case AMDGPU::ONE_INT:
+ case AMDGPU::NEG_ONE:
+ case AMDGPU::HALF:
+ case AMDGPU::NEG_HALF:
+ case AMDGPU::ALU_LITERAL_X:
+ case AMDGPU::PREDICATE_BIT:
+ case AMDGPU::PRED_SEL_OFF:
+ case AMDGPU::PRED_SEL_ZERO:
+ case AMDGPU::PRED_SEL_ONE:
+ return 0;
+ default: return getHWRegChanGen(reg);
+ }
+}
+
+const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
+ MVT VT) const
+{
+ switch(VT.SimpleTy) {
+ default:
+ case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
+ }
+}
+
+unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const
+{
+ switch (Channel) {
+ default: assert(!"Invalid channel index"); return 0;
+ case 0: return AMDGPU::sel_x;
+ case 1: return AMDGPU::sel_y;
+ case 2: return AMDGPU::sel_z;
+ case 3: return AMDGPU::sel_w;
+ }
+}
+
+#include "R600HwRegInfo.include"
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.h b/lib/Target/AMDGPU/R600RegisterInfo.h
new file mode 100644
index 0000000..853adb0
--- /dev/null
+++ b/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -0,0 +1,58 @@
+//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface definition for R600RegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600REGISTERINFO_H_
+#define R600REGISTERINFO_H_
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPURegisterInfo.h"
+
+namespace llvm {
+
+class R600TargetMachine;
+class TargetInstrInfo;
+
+struct R600RegisterInfo : public AMDGPURegisterInfo
+{
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// getISARegClass - rc is an AMDIL reg class. This function returns the
+ /// R600 reg class that is equivalent to the given AMDIL reg class.
+ virtual const TargetRegisterClass * getISARegClass(
+ const TargetRegisterClass * rc) const;
+
+ /// getHWRegChan - get the HW encoding for a register's channel.
+ unsigned getHWRegChan(unsigned reg) const;
+
+ /// getCFGStructurizerRegClass - get the register class of the specified
+ /// type to use in the CFGStructurizer
+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+
+ /// getSubRegFromChannel - Return the sub reg enum value for the given
+ /// Channel (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
+ unsigned getSubRegFromChannel(unsigned Channel) const;
+
+private:
+ /// getHWRegChanGen - Generated function returns a register's channel
+ /// encoding.
+ unsigned getHWRegChanGen(unsigned reg) const;
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td
new file mode 100644
index 0000000..6904838
--- /dev/null
+++ b/lib/Target/AMDGPU/R600RegisterInfo.td
@@ -0,0 +1,1215 @@
+
+class R600Reg <string name, bits<16> encoding> : Register<name> {
+ let Namespace = "AMDGPU";
+ let HWEncoding = encoding;
+}
+
+class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
+ RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
+ let HWEncoding = encoding;
+}
+
+def C0_X : R600Reg <"C0.X", 0>;
+def C0_Y : R600Reg <"C0.Y", 0>;
+def C0_Z : R600Reg <"C0.Z", 0>;
+def C0_W : R600Reg <"C0.W", 0>;
+def C1_X : R600Reg <"C1.X", 1>;
+def C1_Y : R600Reg <"C1.Y", 1>;
+def C1_Z : R600Reg <"C1.Z", 1>;
+def C1_W : R600Reg <"C1.W", 1>;
+def C2_X : R600Reg <"C2.X", 2>;
+def C2_Y : R600Reg <"C2.Y", 2>;
+def C2_Z : R600Reg <"C2.Z", 2>;
+def C2_W : R600Reg <"C2.W", 2>;
+def C3_X : R600Reg <"C3.X", 3>;
+def C3_Y : R600Reg <"C3.Y", 3>;
+def C3_Z : R600Reg <"C3.Z", 3>;
+def C3_W : R600Reg <"C3.W", 3>;
+def C4_X : R600Reg <"C4.X", 4>;
+def C4_Y : R600Reg <"C4.Y", 4>;
+def C4_Z : R600Reg <"C4.Z", 4>;
+def C4_W : R600Reg <"C4.W", 4>;
+def C5_X : R600Reg <"C5.X", 5>;
+def C5_Y : R600Reg <"C5.Y", 5>;
+def C5_Z : R600Reg <"C5.Z", 5>;
+def C5_W : R600Reg <"C5.W", 5>;
+def C6_X : R600Reg <"C6.X", 6>;
+def C6_Y : R600Reg <"C6.Y", 6>;
+def C6_Z : R600Reg <"C6.Z", 6>;
+def C6_W : R600Reg <"C6.W", 6>;
+def C7_X : R600Reg <"C7.X", 7>;
+def C7_Y : R600Reg <"C7.Y", 7>;
+def C7_Z : R600Reg <"C7.Z", 7>;
+def C7_W : R600Reg <"C7.W", 7>;
+def C8_X : R600Reg <"C8.X", 8>;
+def C8_Y : R600Reg <"C8.Y", 8>;
+def C8_Z : R600Reg <"C8.Z", 8>;
+def C8_W : R600Reg <"C8.W", 8>;
+def C9_X : R600Reg <"C9.X", 9>;
+def C9_Y : R600Reg <"C9.Y", 9>;
+def C9_Z : R600Reg <"C9.Z", 9>;
+def C9_W : R600Reg <"C9.W", 9>;
+def C10_X : R600Reg <"C10.X", 10>;
+def C10_Y : R600Reg <"C10.Y", 10>;
+def C10_Z : R600Reg <"C10.Z", 10>;
+def C10_W : R600Reg <"C10.W", 10>;
+def C11_X : R600Reg <"C11.X", 11>;
+def C11_Y : R600Reg <"C11.Y", 11>;
+def C11_Z : R600Reg <"C11.Z", 11>;
+def C11_W : R600Reg <"C11.W", 11>;
+def C12_X : R600Reg <"C12.X", 12>;
+def C12_Y : R600Reg <"C12.Y", 12>;
+def C12_Z : R600Reg <"C12.Z", 12>;
+def C12_W : R600Reg <"C12.W", 12>;
+def C13_X : R600Reg <"C13.X", 13>;
+def C13_Y : R600Reg <"C13.Y", 13>;
+def C13_Z : R600Reg <"C13.Z", 13>;
+def C13_W : R600Reg <"C13.W", 13>;
+def C14_X : R600Reg <"C14.X", 14>;
+def C14_Y : R600Reg <"C14.Y", 14>;
+def C14_Z : R600Reg <"C14.Z", 14>;
+def C14_W : R600Reg <"C14.W", 14>;
+def C15_X : R600Reg <"C15.X", 15>;
+def C15_Y : R600Reg <"C15.Y", 15>;
+def C15_Z : R600Reg <"C15.Z", 15>;
+def C15_W : R600Reg <"C15.W", 15>;
+def C16_X : R600Reg <"C16.X", 16>;
+def C16_Y : R600Reg <"C16.Y", 16>;
+def C16_Z : R600Reg <"C16.Z", 16>;
+def C16_W : R600Reg <"C16.W", 16>;
+def C17_X : R600Reg <"C17.X", 17>;
+def C17_Y : R600Reg <"C17.Y", 17>;
+def C17_Z : R600Reg <"C17.Z", 17>;
+def C17_W : R600Reg <"C17.W", 17>;
+def C18_X : R600Reg <"C18.X", 18>;
+def C18_Y : R600Reg <"C18.Y", 18>;
+def C18_Z : R600Reg <"C18.Z", 18>;
+def C18_W : R600Reg <"C18.W", 18>;
+def C19_X : R600Reg <"C19.X", 19>;
+def C19_Y : R600Reg <"C19.Y", 19>;
+def C19_Z : R600Reg <"C19.Z", 19>;
+def C19_W : R600Reg <"C19.W", 19>;
+def C20_X : R600Reg <"C20.X", 20>;
+def C20_Y : R600Reg <"C20.Y", 20>;
+def C20_Z : R600Reg <"C20.Z", 20>;
+def C20_W : R600Reg <"C20.W", 20>;
+def C21_X : R600Reg <"C21.X", 21>;
+def C21_Y : R600Reg <"C21.Y", 21>;
+def C21_Z : R600Reg <"C21.Z", 21>;
+def C21_W : R600Reg <"C21.W", 21>;
+def C22_X : R600Reg <"C22.X", 22>;
+def C22_Y : R600Reg <"C22.Y", 22>;
+def C22_Z : R600Reg <"C22.Z", 22>;
+def C22_W : R600Reg <"C22.W", 22>;
+def C23_X : R600Reg <"C23.X", 23>;
+def C23_Y : R600Reg <"C23.Y", 23>;
+def C23_Z : R600Reg <"C23.Z", 23>;
+def C23_W : R600Reg <"C23.W", 23>;
+def C24_X : R600Reg <"C24.X", 24>;
+def C24_Y : R600Reg <"C24.Y", 24>;
+def C24_Z : R600Reg <"C24.Z", 24>;
+def C24_W : R600Reg <"C24.W", 24>;
+def C25_X : R600Reg <"C25.X", 25>;
+def C25_Y : R600Reg <"C25.Y", 25>;
+def C25_Z : R600Reg <"C25.Z", 25>;
+def C25_W : R600Reg <"C25.W", 25>;
+def C26_X : R600Reg <"C26.X", 26>;
+def C26_Y : R600Reg <"C26.Y", 26>;
+def C26_Z : R600Reg <"C26.Z", 26>;
+def C26_W : R600Reg <"C26.W", 26>;
+def C27_X : R600Reg <"C27.X", 27>;
+def C27_Y : R600Reg <"C27.Y", 27>;
+def C27_Z : R600Reg <"C27.Z", 27>;
+def C27_W : R600Reg <"C27.W", 27>;
+def C28_X : R600Reg <"C28.X", 28>;
+def C28_Y : R600Reg <"C28.Y", 28>;
+def C28_Z : R600Reg <"C28.Z", 28>;
+def C28_W : R600Reg <"C28.W", 28>;
+def C29_X : R600Reg <"C29.X", 29>;
+def C29_Y : R600Reg <"C29.Y", 29>;
+def C29_Z : R600Reg <"C29.Z", 29>;
+def C29_W : R600Reg <"C29.W", 29>;
+def C30_X : R600Reg <"C30.X", 30>;
+def C30_Y : R600Reg <"C30.Y", 30>;
+def C30_Z : R600Reg <"C30.Z", 30>;
+def C30_W : R600Reg <"C30.W", 30>;
+def C31_X : R600Reg <"C31.X", 31>;
+def C31_Y : R600Reg <"C31.Y", 31>;
+def C31_Z : R600Reg <"C31.Z", 31>;
+def C31_W : R600Reg <"C31.W", 31>;
+def C32_X : R600Reg <"C32.X", 32>;
+def C32_Y : R600Reg <"C32.Y", 32>;
+def C32_Z : R600Reg <"C32.Z", 32>;
+def C32_W : R600Reg <"C32.W", 32>;
+def C33_X : R600Reg <"C33.X", 33>;
+def C33_Y : R600Reg <"C33.Y", 33>;
+def C33_Z : R600Reg <"C33.Z", 33>;
+def C33_W : R600Reg <"C33.W", 33>;
+def C34_X : R600Reg <"C34.X", 34>;
+def C34_Y : R600Reg <"C34.Y", 34>;
+def C34_Z : R600Reg <"C34.Z", 34>;
+def C34_W : R600Reg <"C34.W", 34>;
+def C35_X : R600Reg <"C35.X", 35>;
+def C35_Y : R600Reg <"C35.Y", 35>;
+def C35_Z : R600Reg <"C35.Z", 35>;
+def C35_W : R600Reg <"C35.W", 35>;
+def C36_X : R600Reg <"C36.X", 36>;
+def C36_Y : R600Reg <"C36.Y", 36>;
+def C36_Z : R600Reg <"C36.Z", 36>;
+def C36_W : R600Reg <"C36.W", 36>;
+def C37_X : R600Reg <"C37.X", 37>;
+def C37_Y : R600Reg <"C37.Y", 37>;
+def C37_Z : R600Reg <"C37.Z", 37>;
+def C37_W : R600Reg <"C37.W", 37>;
+def C38_X : R600Reg <"C38.X", 38>;
+def C38_Y : R600Reg <"C38.Y", 38>;
+def C38_Z : R600Reg <"C38.Z", 38>;
+def C38_W : R600Reg <"C38.W", 38>;
+def C39_X : R600Reg <"C39.X", 39>;
+def C39_Y : R600Reg <"C39.Y", 39>;
+def C39_Z : R600Reg <"C39.Z", 39>;
+def C39_W : R600Reg <"C39.W", 39>;
+def C40_X : R600Reg <"C40.X", 40>;
+def C40_Y : R600Reg <"C40.Y", 40>;
+def C40_Z : R600Reg <"C40.Z", 40>;
+def C40_W : R600Reg <"C40.W", 40>;
+def C41_X : R600Reg <"C41.X", 41>;
+def C41_Y : R600Reg <"C41.Y", 41>;
+def C41_Z : R600Reg <"C41.Z", 41>;
+def C41_W : R600Reg <"C41.W", 41>;
+def C42_X : R600Reg <"C42.X", 42>;
+def C42_Y : R600Reg <"C42.Y", 42>;
+def C42_Z : R600Reg <"C42.Z", 42>;
+def C42_W : R600Reg <"C42.W", 42>;
+def C43_X : R600Reg <"C43.X", 43>;
+def C43_Y : R600Reg <"C43.Y", 43>;
+def C43_Z : R600Reg <"C43.Z", 43>;
+def C43_W : R600Reg <"C43.W", 43>;
+def C44_X : R600Reg <"C44.X", 44>;
+def C44_Y : R600Reg <"C44.Y", 44>;
+def C44_Z : R600Reg <"C44.Z", 44>;
+def C44_W : R600Reg <"C44.W", 44>;
+def C45_X : R600Reg <"C45.X", 45>;
+def C45_Y : R600Reg <"C45.Y", 45>;
+def C45_Z : R600Reg <"C45.Z", 45>;
+def C45_W : R600Reg <"C45.W", 45>;
+def C46_X : R600Reg <"C46.X", 46>;
+def C46_Y : R600Reg <"C46.Y", 46>;
+def C46_Z : R600Reg <"C46.Z", 46>;
+def C46_W : R600Reg <"C46.W", 46>;
+def C47_X : R600Reg <"C47.X", 47>;
+def C47_Y : R600Reg <"C47.Y", 47>;
+def C47_Z : R600Reg <"C47.Z", 47>;
+def C47_W : R600Reg <"C47.W", 47>;
+def C48_X : R600Reg <"C48.X", 48>;
+def C48_Y : R600Reg <"C48.Y", 48>;
+def C48_Z : R600Reg <"C48.Z", 48>;
+def C48_W : R600Reg <"C48.W", 48>;
+def C49_X : R600Reg <"C49.X", 49>;
+def C49_Y : R600Reg <"C49.Y", 49>;
+def C49_Z : R600Reg <"C49.Z", 49>;
+def C49_W : R600Reg <"C49.W", 49>;
+def C50_X : R600Reg <"C50.X", 50>;
+def C50_Y : R600Reg <"C50.Y", 50>;
+def C50_Z : R600Reg <"C50.Z", 50>;
+def C50_W : R600Reg <"C50.W", 50>;
+def C51_X : R600Reg <"C51.X", 51>;
+def C51_Y : R600Reg <"C51.Y", 51>;
+def C51_Z : R600Reg <"C51.Z", 51>;
+def C51_W : R600Reg <"C51.W", 51>;
+def C52_X : R600Reg <"C52.X", 52>;
+def C52_Y : R600Reg <"C52.Y", 52>;
+def C52_Z : R600Reg <"C52.Z", 52>;
+def C52_W : R600Reg <"C52.W", 52>;
+def C53_X : R600Reg <"C53.X", 53>;
+def C53_Y : R600Reg <"C53.Y", 53>;
+def C53_Z : R600Reg <"C53.Z", 53>;
+def C53_W : R600Reg <"C53.W", 53>;
+def C54_X : R600Reg <"C54.X", 54>;
+def C54_Y : R600Reg <"C54.Y", 54>;
+def C54_Z : R600Reg <"C54.Z", 54>;
+def C54_W : R600Reg <"C54.W", 54>;
+def C55_X : R600Reg <"C55.X", 55>;
+def C55_Y : R600Reg <"C55.Y", 55>;
+def C55_Z : R600Reg <"C55.Z", 55>;
+def C55_W : R600Reg <"C55.W", 55>;
+def C56_X : R600Reg <"C56.X", 56>;
+def C56_Y : R600Reg <"C56.Y", 56>;
+def C56_Z : R600Reg <"C56.Z", 56>;
+def C56_W : R600Reg <"C56.W", 56>;
+def C57_X : R600Reg <"C57.X", 57>;
+def C57_Y : R600Reg <"C57.Y", 57>;
+def C57_Z : R600Reg <"C57.Z", 57>;
+def C57_W : R600Reg <"C57.W", 57>;
+def C58_X : R600Reg <"C58.X", 58>;
+def C58_Y : R600Reg <"C58.Y", 58>;
+def C58_Z : R600Reg <"C58.Z", 58>;
+def C58_W : R600Reg <"C58.W", 58>;
+def C59_X : R600Reg <"C59.X", 59>;
+def C59_Y : R600Reg <"C59.Y", 59>;
+def C59_Z : R600Reg <"C59.Z", 59>;
+def C59_W : R600Reg <"C59.W", 59>;
+def C60_X : R600Reg <"C60.X", 60>;
+def C60_Y : R600Reg <"C60.Y", 60>;
+def C60_Z : R600Reg <"C60.Z", 60>;
+def C60_W : R600Reg <"C60.W", 60>;
+def C61_X : R600Reg <"C61.X", 61>;
+def C61_Y : R600Reg <"C61.Y", 61>;
+def C61_Z : R600Reg <"C61.Z", 61>;
+def C61_W : R600Reg <"C61.W", 61>;
+def C62_X : R600Reg <"C62.X", 62>;
+def C62_Y : R600Reg <"C62.Y", 62>;
+def C62_Z : R600Reg <"C62.Z", 62>;
+def C62_W : R600Reg <"C62.W", 62>;
+def C63_X : R600Reg <"C63.X", 63>;
+def C63_Y : R600Reg <"C63.Y", 63>;
+def C63_Z : R600Reg <"C63.Z", 63>;
+def C63_W : R600Reg <"C63.W", 63>;
+def C64_X : R600Reg <"C64.X", 64>;
+def C64_Y : R600Reg <"C64.Y", 64>;
+def C64_Z : R600Reg <"C64.Z", 64>;
+def C64_W : R600Reg <"C64.W", 64>;
+def C65_X : R600Reg <"C65.X", 65>;
+def C65_Y : R600Reg <"C65.Y", 65>;
+def C65_Z : R600Reg <"C65.Z", 65>;
+def C65_W : R600Reg <"C65.W", 65>;
+def C66_X : R600Reg <"C66.X", 66>;
+def C66_Y : R600Reg <"C66.Y", 66>;
+def C66_Z : R600Reg <"C66.Z", 66>;
+def C66_W : R600Reg <"C66.W", 66>;
+def C67_X : R600Reg <"C67.X", 67>;
+def C67_Y : R600Reg <"C67.Y", 67>;
+def C67_Z : R600Reg <"C67.Z", 67>;
+def C67_W : R600Reg <"C67.W", 67>;
+def C68_X : R600Reg <"C68.X", 68>;
+def C68_Y : R600Reg <"C68.Y", 68>;
+def C68_Z : R600Reg <"C68.Z", 68>;
+def C68_W : R600Reg <"C68.W", 68>;
+def C69_X : R600Reg <"C69.X", 69>;
+def C69_Y : R600Reg <"C69.Y", 69>;
+def C69_Z : R600Reg <"C69.Z", 69>;
+def C69_W : R600Reg <"C69.W", 69>;
+def C70_X : R600Reg <"C70.X", 70>;
+def C70_Y : R600Reg <"C70.Y", 70>;
+def C70_Z : R600Reg <"C70.Z", 70>;
+def C70_W : R600Reg <"C70.W", 70>;
+def C71_X : R600Reg <"C71.X", 71>;
+def C71_Y : R600Reg <"C71.Y", 71>;
+def C71_Z : R600Reg <"C71.Z", 71>;
+def C71_W : R600Reg <"C71.W", 71>;
+def C72_X : R600Reg <"C72.X", 72>;
+def C72_Y : R600Reg <"C72.Y", 72>;
+def C72_Z : R600Reg <"C72.Z", 72>;
+def C72_W : R600Reg <"C72.W", 72>;
+def C73_X : R600Reg <"C73.X", 73>;
+def C73_Y : R600Reg <"C73.Y", 73>;
+def C73_Z : R600Reg <"C73.Z", 73>;
+def C73_W : R600Reg <"C73.W", 73>;
+def C74_X : R600Reg <"C74.X", 74>;
+def C74_Y : R600Reg <"C74.Y", 74>;
+def C74_Z : R600Reg <"C74.Z", 74>;
+def C74_W : R600Reg <"C74.W", 74>;
+def C75_X : R600Reg <"C75.X", 75>;
+def C75_Y : R600Reg <"C75.Y", 75>;
+def C75_Z : R600Reg <"C75.Z", 75>;
+def C75_W : R600Reg <"C75.W", 75>;
+def C76_X : R600Reg <"C76.X", 76>;
+def C76_Y : R600Reg <"C76.Y", 76>;
+def C76_Z : R600Reg <"C76.Z", 76>;
+def C76_W : R600Reg <"C76.W", 76>;
+def C77_X : R600Reg <"C77.X", 77>;
+def C77_Y : R600Reg <"C77.Y", 77>;
+def C77_Z : R600Reg <"C77.Z", 77>;
+def C77_W : R600Reg <"C77.W", 77>;
+def C78_X : R600Reg <"C78.X", 78>;
+def C78_Y : R600Reg <"C78.Y", 78>;
+def C78_Z : R600Reg <"C78.Z", 78>;
+def C78_W : R600Reg <"C78.W", 78>;
+def C79_X : R600Reg <"C79.X", 79>;
+def C79_Y : R600Reg <"C79.Y", 79>;
+def C79_Z : R600Reg <"C79.Z", 79>;
+def C79_W : R600Reg <"C79.W", 79>;
+def C80_X : R600Reg <"C80.X", 80>;
+def C80_Y : R600Reg <"C80.Y", 80>;
+def C80_Z : R600Reg <"C80.Z", 80>;
+def C80_W : R600Reg <"C80.W", 80>;
+def C81_X : R600Reg <"C81.X", 81>;
+def C81_Y : R600Reg <"C81.Y", 81>;
+def C81_Z : R600Reg <"C81.Z", 81>;
+def C81_W : R600Reg <"C81.W", 81>;
+def C82_X : R600Reg <"C82.X", 82>;
+def C82_Y : R600Reg <"C82.Y", 82>;
+def C82_Z : R600Reg <"C82.Z", 82>;
+def C82_W : R600Reg <"C82.W", 82>;
+def C83_X : R600Reg <"C83.X", 83>;
+def C83_Y : R600Reg <"C83.Y", 83>;
+def C83_Z : R600Reg <"C83.Z", 83>;
+def C83_W : R600Reg <"C83.W", 83>;
+def C84_X : R600Reg <"C84.X", 84>;
+def C84_Y : R600Reg <"C84.Y", 84>;
+def C84_Z : R600Reg <"C84.Z", 84>;
+def C84_W : R600Reg <"C84.W", 84>;
+def C85_X : R600Reg <"C85.X", 85>;
+def C85_Y : R600Reg <"C85.Y", 85>;
+def C85_Z : R600Reg <"C85.Z", 85>;
+def C85_W : R600Reg <"C85.W", 85>;
+def C86_X : R600Reg <"C86.X", 86>;
+def C86_Y : R600Reg <"C86.Y", 86>;
+def C86_Z : R600Reg <"C86.Z", 86>;
+def C86_W : R600Reg <"C86.W", 86>;
+def C87_X : R600Reg <"C87.X", 87>;
+def C87_Y : R600Reg <"C87.Y", 87>;
+def C87_Z : R600Reg <"C87.Z", 87>;
+def C87_W : R600Reg <"C87.W", 87>;
+def C88_X : R600Reg <"C88.X", 88>;
+def C88_Y : R600Reg <"C88.Y", 88>;
+def C88_Z : R600Reg <"C88.Z", 88>;
+def C88_W : R600Reg <"C88.W", 88>;
+def C89_X : R600Reg <"C89.X", 89>;
+def C89_Y : R600Reg <"C89.Y", 89>;
+def C89_Z : R600Reg <"C89.Z", 89>;
+def C89_W : R600Reg <"C89.W", 89>;
+def C90_X : R600Reg <"C90.X", 90>;
+def C90_Y : R600Reg <"C90.Y", 90>;
+def C90_Z : R600Reg <"C90.Z", 90>;
+def C90_W : R600Reg <"C90.W", 90>;
+def C91_X : R600Reg <"C91.X", 91>;
+def C91_Y : R600Reg <"C91.Y", 91>;
+def C91_Z : R600Reg <"C91.Z", 91>;
+def C91_W : R600Reg <"C91.W", 91>;
+def C92_X : R600Reg <"C92.X", 92>;
+def C92_Y : R600Reg <"C92.Y", 92>;
+def C92_Z : R600Reg <"C92.Z", 92>;
+def C92_W : R600Reg <"C92.W", 92>;
+def C93_X : R600Reg <"C93.X", 93>;
+def C93_Y : R600Reg <"C93.Y", 93>;
+def C93_Z : R600Reg <"C93.Z", 93>;
+def C93_W : R600Reg <"C93.W", 93>;
+def C94_X : R600Reg <"C94.X", 94>;
+def C94_Y : R600Reg <"C94.Y", 94>;
+def C94_Z : R600Reg <"C94.Z", 94>;
+def C94_W : R600Reg <"C94.W", 94>;
+def C95_X : R600Reg <"C95.X", 95>;
+def C95_Y : R600Reg <"C95.Y", 95>;
+def C95_Z : R600Reg <"C95.Z", 95>;
+def C95_W : R600Reg <"C95.W", 95>;
+def C96_X : R600Reg <"C96.X", 96>;
+def C96_Y : R600Reg <"C96.Y", 96>;
+def C96_Z : R600Reg <"C96.Z", 96>;
+def C96_W : R600Reg <"C96.W", 96>;
+def C97_X : R600Reg <"C97.X", 97>;
+def C97_Y : R600Reg <"C97.Y", 97>;
+def C97_Z : R600Reg <"C97.Z", 97>;
+def C97_W : R600Reg <"C97.W", 97>;
+def C98_X : R600Reg <"C98.X", 98>;
+def C98_Y : R600Reg <"C98.Y", 98>;
+def C98_Z : R600Reg <"C98.Z", 98>;
+def C98_W : R600Reg <"C98.W", 98>;
+def C99_X : R600Reg <"C99.X", 99>;
+def C99_Y : R600Reg <"C99.Y", 99>;
+def C99_Z : R600Reg <"C99.Z", 99>;
+def C99_W : R600Reg <"C99.W", 99>;
+def T0_X : R600Reg <"T0.X", 0>;
+def T0_Y : R600Reg <"T0.Y", 0>;
+def T0_Z : R600Reg <"T0.Z", 0>;
+def T0_W : R600Reg <"T0.W", 0>;
+def T1_X : R600Reg <"T1.X", 1>;
+def T1_Y : R600Reg <"T1.Y", 1>;
+def T1_Z : R600Reg <"T1.Z", 1>;
+def T1_W : R600Reg <"T1.W", 1>;
+def T2_X : R600Reg <"T2.X", 2>;
+def T2_Y : R600Reg <"T2.Y", 2>;
+def T2_Z : R600Reg <"T2.Z", 2>;
+def T2_W : R600Reg <"T2.W", 2>;
+def T3_X : R600Reg <"T3.X", 3>;
+def T3_Y : R600Reg <"T3.Y", 3>;
+def T3_Z : R600Reg <"T3.Z", 3>;
+def T3_W : R600Reg <"T3.W", 3>;
+def T4_X : R600Reg <"T4.X", 4>;
+def T4_Y : R600Reg <"T4.Y", 4>;
+def T4_Z : R600Reg <"T4.Z", 4>;
+def T4_W : R600Reg <"T4.W", 4>;
+def T5_X : R600Reg <"T5.X", 5>;
+def T5_Y : R600Reg <"T5.Y", 5>;
+def T5_Z : R600Reg <"T5.Z", 5>;
+def T5_W : R600Reg <"T5.W", 5>;
+def T6_X : R600Reg <"T6.X", 6>;
+def T6_Y : R600Reg <"T6.Y", 6>;
+def T6_Z : R600Reg <"T6.Z", 6>;
+def T6_W : R600Reg <"T6.W", 6>;
+def T7_X : R600Reg <"T7.X", 7>;
+def T7_Y : R600Reg <"T7.Y", 7>;
+def T7_Z : R600Reg <"T7.Z", 7>;
+def T7_W : R600Reg <"T7.W", 7>;
+def T8_X : R600Reg <"T8.X", 8>;
+def T8_Y : R600Reg <"T8.Y", 8>;
+def T8_Z : R600Reg <"T8.Z", 8>;
+def T8_W : R600Reg <"T8.W", 8>;
+def T9_X : R600Reg <"T9.X", 9>;
+def T9_Y : R600Reg <"T9.Y", 9>;
+def T9_Z : R600Reg <"T9.Z", 9>;
+def T9_W : R600Reg <"T9.W", 9>;
+def T10_X : R600Reg <"T10.X", 10>;
+def T10_Y : R600Reg <"T10.Y", 10>;
+def T10_Z : R600Reg <"T10.Z", 10>;
+def T10_W : R600Reg <"T10.W", 10>;
+def T11_X : R600Reg <"T11.X", 11>;
+def T11_Y : R600Reg <"T11.Y", 11>;
+def T11_Z : R600Reg <"T11.Z", 11>;
+def T11_W : R600Reg <"T11.W", 11>;
+def T12_X : R600Reg <"T12.X", 12>;
+def T12_Y : R600Reg <"T12.Y", 12>;
+def T12_Z : R600Reg <"T12.Z", 12>;
+def T12_W : R600Reg <"T12.W", 12>;
+def T13_X : R600Reg <"T13.X", 13>;
+def T13_Y : R600Reg <"T13.Y", 13>;
+def T13_Z : R600Reg <"T13.Z", 13>;
+def T13_W : R600Reg <"T13.W", 13>;
+def T14_X : R600Reg <"T14.X", 14>;
+def T14_Y : R600Reg <"T14.Y", 14>;
+def T14_Z : R600Reg <"T14.Z", 14>;
+def T14_W : R600Reg <"T14.W", 14>;
+def T15_X : R600Reg <"T15.X", 15>;
+def T15_Y : R600Reg <"T15.Y", 15>;
+def T15_Z : R600Reg <"T15.Z", 15>;
+def T15_W : R600Reg <"T15.W", 15>;
+def T16_X : R600Reg <"T16.X", 16>;
+def T16_Y : R600Reg <"T16.Y", 16>;
+def T16_Z : R600Reg <"T16.Z", 16>;
+def T16_W : R600Reg <"T16.W", 16>;
+def T17_X : R600Reg <"T17.X", 17>;
+def T17_Y : R600Reg <"T17.Y", 17>;
+def T17_Z : R600Reg <"T17.Z", 17>;
+def T17_W : R600Reg <"T17.W", 17>;
+def T18_X : R600Reg <"T18.X", 18>;
+def T18_Y : R600Reg <"T18.Y", 18>;
+def T18_Z : R600Reg <"T18.Z", 18>;
+def T18_W : R600Reg <"T18.W", 18>;
+def T19_X : R600Reg <"T19.X", 19>;
+def T19_Y : R600Reg <"T19.Y", 19>;
+def T19_Z : R600Reg <"T19.Z", 19>;
+def T19_W : R600Reg <"T19.W", 19>;
+def T20_X : R600Reg <"T20.X", 20>;
+def T20_Y : R600Reg <"T20.Y", 20>;
+def T20_Z : R600Reg <"T20.Z", 20>;
+def T20_W : R600Reg <"T20.W", 20>;
+def T21_X : R600Reg <"T21.X", 21>;
+def T21_Y : R600Reg <"T21.Y", 21>;
+def T21_Z : R600Reg <"T21.Z", 21>;
+def T21_W : R600Reg <"T21.W", 21>;
+def T22_X : R600Reg <"T22.X", 22>;
+def T22_Y : R600Reg <"T22.Y", 22>;
+def T22_Z : R600Reg <"T22.Z", 22>;
+def T22_W : R600Reg <"T22.W", 22>;
+def T23_X : R600Reg <"T23.X", 23>;
+def T23_Y : R600Reg <"T23.Y", 23>;
+def T23_Z : R600Reg <"T23.Z", 23>;
+def T23_W : R600Reg <"T23.W", 23>;
+def T24_X : R600Reg <"T24.X", 24>;
+def T24_Y : R600Reg <"T24.Y", 24>;
+def T24_Z : R600Reg <"T24.Z", 24>;
+def T24_W : R600Reg <"T24.W", 24>;
+def T25_X : R600Reg <"T25.X", 25>;
+def T25_Y : R600Reg <"T25.Y", 25>;
+def T25_Z : R600Reg <"T25.Z", 25>;
+def T25_W : R600Reg <"T25.W", 25>;
+def T26_X : R600Reg <"T26.X", 26>;
+def T26_Y : R600Reg <"T26.Y", 26>;
+def T26_Z : R600Reg <"T26.Z", 26>;
+def T26_W : R600Reg <"T26.W", 26>;
+def T27_X : R600Reg <"T27.X", 27>;
+def T27_Y : R600Reg <"T27.Y", 27>;
+def T27_Z : R600Reg <"T27.Z", 27>;
+def T27_W : R600Reg <"T27.W", 27>;
+def T28_X : R600Reg <"T28.X", 28>;
+def T28_Y : R600Reg <"T28.Y", 28>;
+def T28_Z : R600Reg <"T28.Z", 28>;
+def T28_W : R600Reg <"T28.W", 28>;
+def T29_X : R600Reg <"T29.X", 29>;
+def T29_Y : R600Reg <"T29.Y", 29>;
+def T29_Z : R600Reg <"T29.Z", 29>;
+def T29_W : R600Reg <"T29.W", 29>;
+def T30_X : R600Reg <"T30.X", 30>;
+def T30_Y : R600Reg <"T30.Y", 30>;
+def T30_Z : R600Reg <"T30.Z", 30>;
+def T30_W : R600Reg <"T30.W", 30>;
+def T31_X : R600Reg <"T31.X", 31>;
+def T31_Y : R600Reg <"T31.Y", 31>;
+def T31_Z : R600Reg <"T31.Z", 31>;
+def T31_W : R600Reg <"T31.W", 31>;
+def T32_X : R600Reg <"T32.X", 32>;
+def T32_Y : R600Reg <"T32.Y", 32>;
+def T32_Z : R600Reg <"T32.Z", 32>;
+def T32_W : R600Reg <"T32.W", 32>;
+def T33_X : R600Reg <"T33.X", 33>;
+def T33_Y : R600Reg <"T33.Y", 33>;
+def T33_Z : R600Reg <"T33.Z", 33>;
+def T33_W : R600Reg <"T33.W", 33>;
+def T34_X : R600Reg <"T34.X", 34>;
+def T34_Y : R600Reg <"T34.Y", 34>;
+def T34_Z : R600Reg <"T34.Z", 34>;
+def T34_W : R600Reg <"T34.W", 34>;
+def T35_X : R600Reg <"T35.X", 35>;
+def T35_Y : R600Reg <"T35.Y", 35>;
+def T35_Z : R600Reg <"T35.Z", 35>;
+def T35_W : R600Reg <"T35.W", 35>;
+def T36_X : R600Reg <"T36.X", 36>;
+def T36_Y : R600Reg <"T36.Y", 36>;
+def T36_Z : R600Reg <"T36.Z", 36>;
+def T36_W : R600Reg <"T36.W", 36>;
+def T37_X : R600Reg <"T37.X", 37>;
+def T37_Y : R600Reg <"T37.Y", 37>;
+def T37_Z : R600Reg <"T37.Z", 37>;
+def T37_W : R600Reg <"T37.W", 37>;
+def T38_X : R600Reg <"T38.X", 38>;
+def T38_Y : R600Reg <"T38.Y", 38>;
+def T38_Z : R600Reg <"T38.Z", 38>;
+def T38_W : R600Reg <"T38.W", 38>;
+def T39_X : R600Reg <"T39.X", 39>;
+def T39_Y : R600Reg <"T39.Y", 39>;
+def T39_Z : R600Reg <"T39.Z", 39>;
+def T39_W : R600Reg <"T39.W", 39>;
+def T40_X : R600Reg <"T40.X", 40>;
+def T40_Y : R600Reg <"T40.Y", 40>;
+def T40_Z : R600Reg <"T40.Z", 40>;
+def T40_W : R600Reg <"T40.W", 40>;
+def T41_X : R600Reg <"T41.X", 41>;
+def T41_Y : R600Reg <"T41.Y", 41>;
+def T41_Z : R600Reg <"T41.Z", 41>;
+def T41_W : R600Reg <"T41.W", 41>;
+def T42_X : R600Reg <"T42.X", 42>;
+def T42_Y : R600Reg <"T42.Y", 42>;
+def T42_Z : R600Reg <"T42.Z", 42>;
+def T42_W : R600Reg <"T42.W", 42>;
+def T43_X : R600Reg <"T43.X", 43>;
+def T43_Y : R600Reg <"T43.Y", 43>;
+def T43_Z : R600Reg <"T43.Z", 43>;
+def T43_W : R600Reg <"T43.W", 43>;
+def T44_X : R600Reg <"T44.X", 44>;
+def T44_Y : R600Reg <"T44.Y", 44>;
+def T44_Z : R600Reg <"T44.Z", 44>;
+def T44_W : R600Reg <"T44.W", 44>;
+def T45_X : R600Reg <"T45.X", 45>;
+def T45_Y : R600Reg <"T45.Y", 45>;
+def T45_Z : R600Reg <"T45.Z", 45>;
+def T45_W : R600Reg <"T45.W", 45>;
+def T46_X : R600Reg <"T46.X", 46>;
+def T46_Y : R600Reg <"T46.Y", 46>;
+def T46_Z : R600Reg <"T46.Z", 46>;
+def T46_W : R600Reg <"T46.W", 46>;
+def T47_X : R600Reg <"T47.X", 47>;
+def T47_Y : R600Reg <"T47.Y", 47>;
+def T47_Z : R600Reg <"T47.Z", 47>;
+def T47_W : R600Reg <"T47.W", 47>;
+def T48_X : R600Reg <"T48.X", 48>;
+def T48_Y : R600Reg <"T48.Y", 48>;
+def T48_Z : R600Reg <"T48.Z", 48>;
+def T48_W : R600Reg <"T48.W", 48>;
+def T49_X : R600Reg <"T49.X", 49>;
+def T49_Y : R600Reg <"T49.Y", 49>;
+def T49_Z : R600Reg <"T49.Z", 49>;
+def T49_W : R600Reg <"T49.W", 49>;
+def T50_X : R600Reg <"T50.X", 50>;
+def T50_Y : R600Reg <"T50.Y", 50>;
+def T50_Z : R600Reg <"T50.Z", 50>;
+def T50_W : R600Reg <"T50.W", 50>;
+def T51_X : R600Reg <"T51.X", 51>;
+def T51_Y : R600Reg <"T51.Y", 51>;
+def T51_Z : R600Reg <"T51.Z", 51>;
+def T51_W : R600Reg <"T51.W", 51>;
+def T52_X : R600Reg <"T52.X", 52>;
+def T52_Y : R600Reg <"T52.Y", 52>;
+def T52_Z : R600Reg <"T52.Z", 52>;
+def T52_W : R600Reg <"T52.W", 52>;
+def T53_X : R600Reg <"T53.X", 53>;
+def T53_Y : R600Reg <"T53.Y", 53>;
+def T53_Z : R600Reg <"T53.Z", 53>;
+def T53_W : R600Reg <"T53.W", 53>;
+def T54_X : R600Reg <"T54.X", 54>;
+def T54_Y : R600Reg <"T54.Y", 54>;
+def T54_Z : R600Reg <"T54.Z", 54>;
+def T54_W : R600Reg <"T54.W", 54>;
+def T55_X : R600Reg <"T55.X", 55>;
+def T55_Y : R600Reg <"T55.Y", 55>;
+def T55_Z : R600Reg <"T55.Z", 55>;
+def T55_W : R600Reg <"T55.W", 55>;
+def T56_X : R600Reg <"T56.X", 56>;
+def T56_Y : R600Reg <"T56.Y", 56>;
+def T56_Z : R600Reg <"T56.Z", 56>;
+def T56_W : R600Reg <"T56.W", 56>;
+def T57_X : R600Reg <"T57.X", 57>;
+def T57_Y : R600Reg <"T57.Y", 57>;
+def T57_Z : R600Reg <"T57.Z", 57>;
+def T57_W : R600Reg <"T57.W", 57>;
+def T58_X : R600Reg <"T58.X", 58>;
+def T58_Y : R600Reg <"T58.Y", 58>;
+def T58_Z : R600Reg <"T58.Z", 58>;
+def T58_W : R600Reg <"T58.W", 58>;
+def T59_X : R600Reg <"T59.X", 59>;
+def T59_Y : R600Reg <"T59.Y", 59>;
+def T59_Z : R600Reg <"T59.Z", 59>;
+def T59_W : R600Reg <"T59.W", 59>;
+def T60_X : R600Reg <"T60.X", 60>;
+def T60_Y : R600Reg <"T60.Y", 60>;
+def T60_Z : R600Reg <"T60.Z", 60>;
+def T60_W : R600Reg <"T60.W", 60>;
+def T61_X : R600Reg <"T61.X", 61>;
+def T61_Y : R600Reg <"T61.Y", 61>;
+def T61_Z : R600Reg <"T61.Z", 61>;
+def T61_W : R600Reg <"T61.W", 61>;
+def T62_X : R600Reg <"T62.X", 62>;
+def T62_Y : R600Reg <"T62.Y", 62>;
+def T62_Z : R600Reg <"T62.Z", 62>;
+def T62_W : R600Reg <"T62.W", 62>;
+def T63_X : R600Reg <"T63.X", 63>;
+def T63_Y : R600Reg <"T63.Y", 63>;
+def T63_Z : R600Reg <"T63.Z", 63>;
+def T63_W : R600Reg <"T63.W", 63>;
+def T64_X : R600Reg <"T64.X", 64>;
+def T64_Y : R600Reg <"T64.Y", 64>;
+def T64_Z : R600Reg <"T64.Z", 64>;
+def T64_W : R600Reg <"T64.W", 64>;
+def T65_X : R600Reg <"T65.X", 65>;
+def T65_Y : R600Reg <"T65.Y", 65>;
+def T65_Z : R600Reg <"T65.Z", 65>;
+def T65_W : R600Reg <"T65.W", 65>;
+def T66_X : R600Reg <"T66.X", 66>;
+def T66_Y : R600Reg <"T66.Y", 66>;
+def T66_Z : R600Reg <"T66.Z", 66>;
+def T66_W : R600Reg <"T66.W", 66>;
+def T67_X : R600Reg <"T67.X", 67>;
+def T67_Y : R600Reg <"T67.Y", 67>;
+def T67_Z : R600Reg <"T67.Z", 67>;
+def T67_W : R600Reg <"T67.W", 67>;
+def T68_X : R600Reg <"T68.X", 68>;
+def T68_Y : R600Reg <"T68.Y", 68>;
+def T68_Z : R600Reg <"T68.Z", 68>;
+def T68_W : R600Reg <"T68.W", 68>;
+def T69_X : R600Reg <"T69.X", 69>;
+def T69_Y : R600Reg <"T69.Y", 69>;
+def T69_Z : R600Reg <"T69.Z", 69>;
+def T69_W : R600Reg <"T69.W", 69>;
+def T70_X : R600Reg <"T70.X", 70>;
+def T70_Y : R600Reg <"T70.Y", 70>;
+def T70_Z : R600Reg <"T70.Z", 70>;
+def T70_W : R600Reg <"T70.W", 70>;
+def T71_X : R600Reg <"T71.X", 71>;
+def T71_Y : R600Reg <"T71.Y", 71>;
+def T71_Z : R600Reg <"T71.Z", 71>;
+def T71_W : R600Reg <"T71.W", 71>;
+def T72_X : R600Reg <"T72.X", 72>;
+def T72_Y : R600Reg <"T72.Y", 72>;
+def T72_Z : R600Reg <"T72.Z", 72>;
+def T72_W : R600Reg <"T72.W", 72>;
+def T73_X : R600Reg <"T73.X", 73>;
+def T73_Y : R600Reg <"T73.Y", 73>;
+def T73_Z : R600Reg <"T73.Z", 73>;
+def T73_W : R600Reg <"T73.W", 73>;
+def T74_X : R600Reg <"T74.X", 74>;
+def T74_Y : R600Reg <"T74.Y", 74>;
+def T74_Z : R600Reg <"T74.Z", 74>;
+def T74_W : R600Reg <"T74.W", 74>;
+def T75_X : R600Reg <"T75.X", 75>;
+def T75_Y : R600Reg <"T75.Y", 75>;
+def T75_Z : R600Reg <"T75.Z", 75>;
+def T75_W : R600Reg <"T75.W", 75>;
+def T76_X : R600Reg <"T76.X", 76>;
+def T76_Y : R600Reg <"T76.Y", 76>;
+def T76_Z : R600Reg <"T76.Z", 76>;
+def T76_W : R600Reg <"T76.W", 76>;
+def T77_X : R600Reg <"T77.X", 77>;
+def T77_Y : R600Reg <"T77.Y", 77>;
+def T77_Z : R600Reg <"T77.Z", 77>;
+def T77_W : R600Reg <"T77.W", 77>;
+def T78_X : R600Reg <"T78.X", 78>;
+def T78_Y : R600Reg <"T78.Y", 78>;
+def T78_Z : R600Reg <"T78.Z", 78>;
+def T78_W : R600Reg <"T78.W", 78>;
+def T79_X : R600Reg <"T79.X", 79>;
+def T79_Y : R600Reg <"T79.Y", 79>;
+def T79_Z : R600Reg <"T79.Z", 79>;
+def T79_W : R600Reg <"T79.W", 79>;
+def T80_X : R600Reg <"T80.X", 80>;
+def T80_Y : R600Reg <"T80.Y", 80>;
+def T80_Z : R600Reg <"T80.Z", 80>;
+def T80_W : R600Reg <"T80.W", 80>;
+def T81_X : R600Reg <"T81.X", 81>;
+def T81_Y : R600Reg <"T81.Y", 81>;
+def T81_Z : R600Reg <"T81.Z", 81>;
+def T81_W : R600Reg <"T81.W", 81>;
+def T82_X : R600Reg <"T82.X", 82>;
+def T82_Y : R600Reg <"T82.Y", 82>;
+def T82_Z : R600Reg <"T82.Z", 82>;
+def T82_W : R600Reg <"T82.W", 82>;
+def T83_X : R600Reg <"T83.X", 83>;
+def T83_Y : R600Reg <"T83.Y", 83>;
+def T83_Z : R600Reg <"T83.Z", 83>;
+def T83_W : R600Reg <"T83.W", 83>;
+def T84_X : R600Reg <"T84.X", 84>;
+def T84_Y : R600Reg <"T84.Y", 84>;
+def T84_Z : R600Reg <"T84.Z", 84>;
+def T84_W : R600Reg <"T84.W", 84>;
+def T85_X : R600Reg <"T85.X", 85>;
+def T85_Y : R600Reg <"T85.Y", 85>;
+def T85_Z : R600Reg <"T85.Z", 85>;
+def T85_W : R600Reg <"T85.W", 85>;
+def T86_X : R600Reg <"T86.X", 86>;
+def T86_Y : R600Reg <"T86.Y", 86>;
+def T86_Z : R600Reg <"T86.Z", 86>;
+def T86_W : R600Reg <"T86.W", 86>;
+def T87_X : R600Reg <"T87.X", 87>;
+def T87_Y : R600Reg <"T87.Y", 87>;
+def T87_Z : R600Reg <"T87.Z", 87>;
+def T87_W : R600Reg <"T87.W", 87>;
+def T88_X : R600Reg <"T88.X", 88>;
+def T88_Y : R600Reg <"T88.Y", 88>;
+def T88_Z : R600Reg <"T88.Z", 88>;
+def T88_W : R600Reg <"T88.W", 88>;
+def T89_X : R600Reg <"T89.X", 89>;
+def T89_Y : R600Reg <"T89.Y", 89>;
+def T89_Z : R600Reg <"T89.Z", 89>;
+def T89_W : R600Reg <"T89.W", 89>;
+def T90_X : R600Reg <"T90.X", 90>;
+def T90_Y : R600Reg <"T90.Y", 90>;
+def T90_Z : R600Reg <"T90.Z", 90>;
+def T90_W : R600Reg <"T90.W", 90>;
+def T91_X : R600Reg <"T91.X", 91>;
+def T91_Y : R600Reg <"T91.Y", 91>;
+def T91_Z : R600Reg <"T91.Z", 91>;
+def T91_W : R600Reg <"T91.W", 91>;
+def T92_X : R600Reg <"T92.X", 92>;
+def T92_Y : R600Reg <"T92.Y", 92>;
+def T92_Z : R600Reg <"T92.Z", 92>;
+def T92_W : R600Reg <"T92.W", 92>;
+def T93_X : R600Reg <"T93.X", 93>;
+def T93_Y : R600Reg <"T93.Y", 93>;
+def T93_Z : R600Reg <"T93.Z", 93>;
+def T93_W : R600Reg <"T93.W", 93>;
+def T94_X : R600Reg <"T94.X", 94>;
+def T94_Y : R600Reg <"T94.Y", 94>;
+def T94_Z : R600Reg <"T94.Z", 94>;
+def T94_W : R600Reg <"T94.W", 94>;
+def T95_X : R600Reg <"T95.X", 95>;
+def T95_Y : R600Reg <"T95.Y", 95>;
+def T95_Z : R600Reg <"T95.Z", 95>;
+def T95_W : R600Reg <"T95.W", 95>;
+def T96_X : R600Reg <"T96.X", 96>;
+def T96_Y : R600Reg <"T96.Y", 96>;
+def T96_Z : R600Reg <"T96.Z", 96>;
+def T96_W : R600Reg <"T96.W", 96>;
+def T97_X : R600Reg <"T97.X", 97>;
+def T97_Y : R600Reg <"T97.Y", 97>;
+def T97_Z : R600Reg <"T97.Z", 97>;
+def T97_W : R600Reg <"T97.W", 97>;
+def T98_X : R600Reg <"T98.X", 98>;
+def T98_Y : R600Reg <"T98.Y", 98>;
+def T98_Z : R600Reg <"T98.Z", 98>;
+def T98_W : R600Reg <"T98.W", 98>;
+def T99_X : R600Reg <"T99.X", 99>;
+def T99_Y : R600Reg <"T99.Y", 99>;
+def T99_Z : R600Reg <"T99.Z", 99>;
+def T99_W : R600Reg <"T99.W", 99>;
+def T100_X : R600Reg <"T100.X", 100>;
+def T100_Y : R600Reg <"T100.Y", 100>;
+def T100_Z : R600Reg <"T100.Z", 100>;
+def T100_W : R600Reg <"T100.W", 100>;
+def T101_X : R600Reg <"T101.X", 101>;
+def T101_Y : R600Reg <"T101.Y", 101>;
+def T101_Z : R600Reg <"T101.Z", 101>;
+def T101_W : R600Reg <"T101.W", 101>;
+def T102_X : R600Reg <"T102.X", 102>;
+def T102_Y : R600Reg <"T102.Y", 102>;
+def T102_Z : R600Reg <"T102.Z", 102>;
+def T102_W : R600Reg <"T102.W", 102>;
+def T103_X : R600Reg <"T103.X", 103>;
+def T103_Y : R600Reg <"T103.Y", 103>;
+def T103_Z : R600Reg <"T103.Z", 103>;
+def T103_W : R600Reg <"T103.W", 103>;
+def T104_X : R600Reg <"T104.X", 104>;
+def T104_Y : R600Reg <"T104.Y", 104>;
+def T104_Z : R600Reg <"T104.Z", 104>;
+def T104_W : R600Reg <"T104.W", 104>;
+def T105_X : R600Reg <"T105.X", 105>;
+def T105_Y : R600Reg <"T105.Y", 105>;
+def T105_Z : R600Reg <"T105.Z", 105>;
+def T105_W : R600Reg <"T105.W", 105>;
+def T106_X : R600Reg <"T106.X", 106>;
+def T106_Y : R600Reg <"T106.Y", 106>;
+def T106_Z : R600Reg <"T106.Z", 106>;
+def T106_W : R600Reg <"T106.W", 106>;
+def T107_X : R600Reg <"T107.X", 107>;
+def T107_Y : R600Reg <"T107.Y", 107>;
+def T107_Z : R600Reg <"T107.Z", 107>;
+def T107_W : R600Reg <"T107.W", 107>;
+def T108_X : R600Reg <"T108.X", 108>;
+def T108_Y : R600Reg <"T108.Y", 108>;
+def T108_Z : R600Reg <"T108.Z", 108>;
+def T108_W : R600Reg <"T108.W", 108>;
+def T109_X : R600Reg <"T109.X", 109>;
+def T109_Y : R600Reg <"T109.Y", 109>;
+def T109_Z : R600Reg <"T109.Z", 109>;
+def T109_W : R600Reg <"T109.W", 109>;
+def T110_X : R600Reg <"T110.X", 110>;
+def T110_Y : R600Reg <"T110.Y", 110>;
+def T110_Z : R600Reg <"T110.Z", 110>;
+def T110_W : R600Reg <"T110.W", 110>;
+def T111_X : R600Reg <"T111.X", 111>;
+def T111_Y : R600Reg <"T111.Y", 111>;
+def T111_Z : R600Reg <"T111.Z", 111>;
+def T111_W : R600Reg <"T111.W", 111>;
+def T112_X : R600Reg <"T112.X", 112>;
+def T112_Y : R600Reg <"T112.Y", 112>;
+def T112_Z : R600Reg <"T112.Z", 112>;
+def T112_W : R600Reg <"T112.W", 112>;
+def T113_X : R600Reg <"T113.X", 113>;
+def T113_Y : R600Reg <"T113.Y", 113>;
+def T113_Z : R600Reg <"T113.Z", 113>;
+def T113_W : R600Reg <"T113.W", 113>;
+def T114_X : R600Reg <"T114.X", 114>;
+def T114_Y : R600Reg <"T114.Y", 114>;
+def T114_Z : R600Reg <"T114.Z", 114>;
+def T114_W : R600Reg <"T114.W", 114>;
+def T115_X : R600Reg <"T115.X", 115>;
+def T115_Y : R600Reg <"T115.Y", 115>;
+def T115_Z : R600Reg <"T115.Z", 115>;
+def T115_W : R600Reg <"T115.W", 115>;
+def T116_X : R600Reg <"T116.X", 116>;
+def T116_Y : R600Reg <"T116.Y", 116>;
+def T116_Z : R600Reg <"T116.Z", 116>;
+def T116_W : R600Reg <"T116.W", 116>;
+def T117_X : R600Reg <"T117.X", 117>;
+def T117_Y : R600Reg <"T117.Y", 117>;
+def T117_Z : R600Reg <"T117.Z", 117>;
+def T117_W : R600Reg <"T117.W", 117>;
+def T118_X : R600Reg <"T118.X", 118>;
+def T118_Y : R600Reg <"T118.Y", 118>;
+def T118_Z : R600Reg <"T118.Z", 118>;
+def T118_W : R600Reg <"T118.W", 118>;
+def T119_X : R600Reg <"T119.X", 119>;
+def T119_Y : R600Reg <"T119.Y", 119>;
+def T119_Z : R600Reg <"T119.Z", 119>;
+def T119_W : R600Reg <"T119.W", 119>;
+def T120_X : R600Reg <"T120.X", 120>;
+def T120_Y : R600Reg <"T120.Y", 120>;
+def T120_Z : R600Reg <"T120.Z", 120>;
+def T120_W : R600Reg <"T120.W", 120>;
+def T121_X : R600Reg <"T121.X", 121>;
+def T121_Y : R600Reg <"T121.Y", 121>;
+def T121_Z : R600Reg <"T121.Z", 121>;
+def T121_W : R600Reg <"T121.W", 121>;
+def T122_X : R600Reg <"T122.X", 122>;
+def T122_Y : R600Reg <"T122.Y", 122>;
+def T122_Z : R600Reg <"T122.Z", 122>;
+def T122_W : R600Reg <"T122.W", 122>;
+def T123_X : R600Reg <"T123.X", 123>;
+def T123_Y : R600Reg <"T123.Y", 123>;
+def T123_Z : R600Reg <"T123.Z", 123>;
+def T123_W : R600Reg <"T123.W", 123>;
+def T124_X : R600Reg <"T124.X", 124>;
+def T124_Y : R600Reg <"T124.Y", 124>;
+def T124_Z : R600Reg <"T124.Z", 124>;
+def T124_W : R600Reg <"T124.W", 124>;
+def T125_X : R600Reg <"T125.X", 125>;
+def T125_Y : R600Reg <"T125.Y", 125>;
+def T125_Z : R600Reg <"T125.Z", 125>;
+def T125_W : R600Reg <"T125.W", 125>;
+def T126_X : R600Reg <"T126.X", 126>;
+def T126_Y : R600Reg <"T126.Y", 126>;
+def T126_Z : R600Reg <"T126.Z", 126>;
+def T126_W : R600Reg <"T126.W", 126>;
+def T127_X : R600Reg <"T127.X", 127>;
+def T127_Y : R600Reg <"T127.Y", 127>;
+def T127_Z : R600Reg <"T127.Z", 127>;
+def T127_W : R600Reg <"T127.W", 127>;
+def T0_XYZW : R600Reg_128 <"T0.XYZW", [T0_X, T0_Y, T0_Z, T0_W], 0 >;
+def T1_XYZW : R600Reg_128 <"T1.XYZW", [T1_X, T1_Y, T1_Z, T1_W], 1 >;
+def T2_XYZW : R600Reg_128 <"T2.XYZW", [T2_X, T2_Y, T2_Z, T2_W], 2 >;
+def T3_XYZW : R600Reg_128 <"T3.XYZW", [T3_X, T3_Y, T3_Z, T3_W], 3 >;
+def T4_XYZW : R600Reg_128 <"T4.XYZW", [T4_X, T4_Y, T4_Z, T4_W], 4 >;
+def T5_XYZW : R600Reg_128 <"T5.XYZW", [T5_X, T5_Y, T5_Z, T5_W], 5 >;
+def T6_XYZW : R600Reg_128 <"T6.XYZW", [T6_X, T6_Y, T6_Z, T6_W], 6 >;
+def T7_XYZW : R600Reg_128 <"T7.XYZW", [T7_X, T7_Y, T7_Z, T7_W], 7 >;
+def T8_XYZW : R600Reg_128 <"T8.XYZW", [T8_X, T8_Y, T8_Z, T8_W], 8 >;
+def T9_XYZW : R600Reg_128 <"T9.XYZW", [T9_X, T9_Y, T9_Z, T9_W], 9 >;
+def T10_XYZW : R600Reg_128 <"T10.XYZW", [T10_X, T10_Y, T10_Z, T10_W], 10 >;
+def T11_XYZW : R600Reg_128 <"T11.XYZW", [T11_X, T11_Y, T11_Z, T11_W], 11 >;
+def T12_XYZW : R600Reg_128 <"T12.XYZW", [T12_X, T12_Y, T12_Z, T12_W], 12 >;
+def T13_XYZW : R600Reg_128 <"T13.XYZW", [T13_X, T13_Y, T13_Z, T13_W], 13 >;
+def T14_XYZW : R600Reg_128 <"T14.XYZW", [T14_X, T14_Y, T14_Z, T14_W], 14 >;
+def T15_XYZW : R600Reg_128 <"T15.XYZW", [T15_X, T15_Y, T15_Z, T15_W], 15 >;
+def T16_XYZW : R600Reg_128 <"T16.XYZW", [T16_X, T16_Y, T16_Z, T16_W], 16 >;
+def T17_XYZW : R600Reg_128 <"T17.XYZW", [T17_X, T17_Y, T17_Z, T17_W], 17 >;
+def T18_XYZW : R600Reg_128 <"T18.XYZW", [T18_X, T18_Y, T18_Z, T18_W], 18 >;
+def T19_XYZW : R600Reg_128 <"T19.XYZW", [T19_X, T19_Y, T19_Z, T19_W], 19 >;
+def T20_XYZW : R600Reg_128 <"T20.XYZW", [T20_X, T20_Y, T20_Z, T20_W], 20 >;
+def T21_XYZW : R600Reg_128 <"T21.XYZW", [T21_X, T21_Y, T21_Z, T21_W], 21 >;
+def T22_XYZW : R600Reg_128 <"T22.XYZW", [T22_X, T22_Y, T22_Z, T22_W], 22 >;
+def T23_XYZW : R600Reg_128 <"T23.XYZW", [T23_X, T23_Y, T23_Z, T23_W], 23 >;
+def T24_XYZW : R600Reg_128 <"T24.XYZW", [T24_X, T24_Y, T24_Z, T24_W], 24 >;
+def T25_XYZW : R600Reg_128 <"T25.XYZW", [T25_X, T25_Y, T25_Z, T25_W], 25 >;
+def T26_XYZW : R600Reg_128 <"T26.XYZW", [T26_X, T26_Y, T26_Z, T26_W], 26 >;
+def T27_XYZW : R600Reg_128 <"T27.XYZW", [T27_X, T27_Y, T27_Z, T27_W], 27 >;
+def T28_XYZW : R600Reg_128 <"T28.XYZW", [T28_X, T28_Y, T28_Z, T28_W], 28 >;
+def T29_XYZW : R600Reg_128 <"T29.XYZW", [T29_X, T29_Y, T29_Z, T29_W], 29 >;
+def T30_XYZW : R600Reg_128 <"T30.XYZW", [T30_X, T30_Y, T30_Z, T30_W], 30 >;
+def T31_XYZW : R600Reg_128 <"T31.XYZW", [T31_X, T31_Y, T31_Z, T31_W], 31 >;
+def T32_XYZW : R600Reg_128 <"T32.XYZW", [T32_X, T32_Y, T32_Z, T32_W], 32 >;
+def T33_XYZW : R600Reg_128 <"T33.XYZW", [T33_X, T33_Y, T33_Z, T33_W], 33 >;
+def T34_XYZW : R600Reg_128 <"T34.XYZW", [T34_X, T34_Y, T34_Z, T34_W], 34 >;
+def T35_XYZW : R600Reg_128 <"T35.XYZW", [T35_X, T35_Y, T35_Z, T35_W], 35 >;
+def T36_XYZW : R600Reg_128 <"T36.XYZW", [T36_X, T36_Y, T36_Z, T36_W], 36 >;
+def T37_XYZW : R600Reg_128 <"T37.XYZW", [T37_X, T37_Y, T37_Z, T37_W], 37 >;
+def T38_XYZW : R600Reg_128 <"T38.XYZW", [T38_X, T38_Y, T38_Z, T38_W], 38 >;
+def T39_XYZW : R600Reg_128 <"T39.XYZW", [T39_X, T39_Y, T39_Z, T39_W], 39 >;
+def T40_XYZW : R600Reg_128 <"T40.XYZW", [T40_X, T40_Y, T40_Z, T40_W], 40 >;
+def T41_XYZW : R600Reg_128 <"T41.XYZW", [T41_X, T41_Y, T41_Z, T41_W], 41 >;
+def T42_XYZW : R600Reg_128 <"T42.XYZW", [T42_X, T42_Y, T42_Z, T42_W], 42 >;
+def T43_XYZW : R600Reg_128 <"T43.XYZW", [T43_X, T43_Y, T43_Z, T43_W], 43 >;
+def T44_XYZW : R600Reg_128 <"T44.XYZW", [T44_X, T44_Y, T44_Z, T44_W], 44 >;
+def T45_XYZW : R600Reg_128 <"T45.XYZW", [T45_X, T45_Y, T45_Z, T45_W], 45 >;
+def T46_XYZW : R600Reg_128 <"T46.XYZW", [T46_X, T46_Y, T46_Z, T46_W], 46 >;
+def T47_XYZW : R600Reg_128 <"T47.XYZW", [T47_X, T47_Y, T47_Z, T47_W], 47 >;
+def T48_XYZW : R600Reg_128 <"T48.XYZW", [T48_X, T48_Y, T48_Z, T48_W], 48 >;
+def T49_XYZW : R600Reg_128 <"T49.XYZW", [T49_X, T49_Y, T49_Z, T49_W], 49 >;
+def T50_XYZW : R600Reg_128 <"T50.XYZW", [T50_X, T50_Y, T50_Z, T50_W], 50 >;
+def T51_XYZW : R600Reg_128 <"T51.XYZW", [T51_X, T51_Y, T51_Z, T51_W], 51 >;
+def T52_XYZW : R600Reg_128 <"T52.XYZW", [T52_X, T52_Y, T52_Z, T52_W], 52 >;
+def T53_XYZW : R600Reg_128 <"T53.XYZW", [T53_X, T53_Y, T53_Z, T53_W], 53 >;
+def T54_XYZW : R600Reg_128 <"T54.XYZW", [T54_X, T54_Y, T54_Z, T54_W], 54 >;
+def T55_XYZW : R600Reg_128 <"T55.XYZW", [T55_X, T55_Y, T55_Z, T55_W], 55 >;
+def T56_XYZW : R600Reg_128 <"T56.XYZW", [T56_X, T56_Y, T56_Z, T56_W], 56 >;
+def T57_XYZW : R600Reg_128 <"T57.XYZW", [T57_X, T57_Y, T57_Z, T57_W], 57 >;
+def T58_XYZW : R600Reg_128 <"T58.XYZW", [T58_X, T58_Y, T58_Z, T58_W], 58 >;
+def T59_XYZW : R600Reg_128 <"T59.XYZW", [T59_X, T59_Y, T59_Z, T59_W], 59 >;
+def T60_XYZW : R600Reg_128 <"T60.XYZW", [T60_X, T60_Y, T60_Z, T60_W], 60 >;
+def T61_XYZW : R600Reg_128 <"T61.XYZW", [T61_X, T61_Y, T61_Z, T61_W], 61 >;
+def T62_XYZW : R600Reg_128 <"T62.XYZW", [T62_X, T62_Y, T62_Z, T62_W], 62 >;
+def T63_XYZW : R600Reg_128 <"T63.XYZW", [T63_X, T63_Y, T63_Z, T63_W], 63 >;
+def T64_XYZW : R600Reg_128 <"T64.XYZW", [T64_X, T64_Y, T64_Z, T64_W], 64 >;
+def T65_XYZW : R600Reg_128 <"T65.XYZW", [T65_X, T65_Y, T65_Z, T65_W], 65 >;
+def T66_XYZW : R600Reg_128 <"T66.XYZW", [T66_X, T66_Y, T66_Z, T66_W], 66 >;
+def T67_XYZW : R600Reg_128 <"T67.XYZW", [T67_X, T67_Y, T67_Z, T67_W], 67 >;
+def T68_XYZW : R600Reg_128 <"T68.XYZW", [T68_X, T68_Y, T68_Z, T68_W], 68 >;
+def T69_XYZW : R600Reg_128 <"T69.XYZW", [T69_X, T69_Y, T69_Z, T69_W], 69 >;
+def T70_XYZW : R600Reg_128 <"T70.XYZW", [T70_X, T70_Y, T70_Z, T70_W], 70 >;
+def T71_XYZW : R600Reg_128 <"T71.XYZW", [T71_X, T71_Y, T71_Z, T71_W], 71 >;
+def T72_XYZW : R600Reg_128 <"T72.XYZW", [T72_X, T72_Y, T72_Z, T72_W], 72 >;
+def T73_XYZW : R600Reg_128 <"T73.XYZW", [T73_X, T73_Y, T73_Z, T73_W], 73 >;
+def T74_XYZW : R600Reg_128 <"T74.XYZW", [T74_X, T74_Y, T74_Z, T74_W], 74 >;
+def T75_XYZW : R600Reg_128 <"T75.XYZW", [T75_X, T75_Y, T75_Z, T75_W], 75 >;
+def T76_XYZW : R600Reg_128 <"T76.XYZW", [T76_X, T76_Y, T76_Z, T76_W], 76 >;
+def T77_XYZW : R600Reg_128 <"T77.XYZW", [T77_X, T77_Y, T77_Z, T77_W], 77 >;
+def T78_XYZW : R600Reg_128 <"T78.XYZW", [T78_X, T78_Y, T78_Z, T78_W], 78 >;
+def T79_XYZW : R600Reg_128 <"T79.XYZW", [T79_X, T79_Y, T79_Z, T79_W], 79 >;
+def T80_XYZW : R600Reg_128 <"T80.XYZW", [T80_X, T80_Y, T80_Z, T80_W], 80 >;
+def T81_XYZW : R600Reg_128 <"T81.XYZW", [T81_X, T81_Y, T81_Z, T81_W], 81 >;
+def T82_XYZW : R600Reg_128 <"T82.XYZW", [T82_X, T82_Y, T82_Z, T82_W], 82 >;
+def T83_XYZW : R600Reg_128 <"T83.XYZW", [T83_X, T83_Y, T83_Z, T83_W], 83 >;
+def T84_XYZW : R600Reg_128 <"T84.XYZW", [T84_X, T84_Y, T84_Z, T84_W], 84 >;
+def T85_XYZW : R600Reg_128 <"T85.XYZW", [T85_X, T85_Y, T85_Z, T85_W], 85 >;
+def T86_XYZW : R600Reg_128 <"T86.XYZW", [T86_X, T86_Y, T86_Z, T86_W], 86 >;
+def T87_XYZW : R600Reg_128 <"T87.XYZW", [T87_X, T87_Y, T87_Z, T87_W], 87 >;
+def T88_XYZW : R600Reg_128 <"T88.XYZW", [T88_X, T88_Y, T88_Z, T88_W], 88 >;
+def T89_XYZW : R600Reg_128 <"T89.XYZW", [T89_X, T89_Y, T89_Z, T89_W], 89 >;
+def T90_XYZW : R600Reg_128 <"T90.XYZW", [T90_X, T90_Y, T90_Z, T90_W], 90 >;
+def T91_XYZW : R600Reg_128 <"T91.XYZW", [T91_X, T91_Y, T91_Z, T91_W], 91 >;
+def T92_XYZW : R600Reg_128 <"T92.XYZW", [T92_X, T92_Y, T92_Z, T92_W], 92 >;
+def T93_XYZW : R600Reg_128 <"T93.XYZW", [T93_X, T93_Y, T93_Z, T93_W], 93 >;
+def T94_XYZW : R600Reg_128 <"T94.XYZW", [T94_X, T94_Y, T94_Z, T94_W], 94 >;
+def T95_XYZW : R600Reg_128 <"T95.XYZW", [T95_X, T95_Y, T95_Z, T95_W], 95 >;
+def T96_XYZW : R600Reg_128 <"T96.XYZW", [T96_X, T96_Y, T96_Z, T96_W], 96 >;
+def T97_XYZW : R600Reg_128 <"T97.XYZW", [T97_X, T97_Y, T97_Z, T97_W], 97 >;
+def T98_XYZW : R600Reg_128 <"T98.XYZW", [T98_X, T98_Y, T98_Z, T98_W], 98 >;
+def T99_XYZW : R600Reg_128 <"T99.XYZW", [T99_X, T99_Y, T99_Z, T99_W], 99 >;
+def T100_XYZW : R600Reg_128 <"T100.XYZW", [T100_X, T100_Y, T100_Z, T100_W], 100 >;
+def T101_XYZW : R600Reg_128 <"T101.XYZW", [T101_X, T101_Y, T101_Z, T101_W], 101 >;
+def T102_XYZW : R600Reg_128 <"T102.XYZW", [T102_X, T102_Y, T102_Z, T102_W], 102 >;
+def T103_XYZW : R600Reg_128 <"T103.XYZW", [T103_X, T103_Y, T103_Z, T103_W], 103 >;
+def T104_XYZW : R600Reg_128 <"T104.XYZW", [T104_X, T104_Y, T104_Z, T104_W], 104 >;
+def T105_XYZW : R600Reg_128 <"T105.XYZW", [T105_X, T105_Y, T105_Z, T105_W], 105 >;
+def T106_XYZW : R600Reg_128 <"T106.XYZW", [T106_X, T106_Y, T106_Z, T106_W], 106 >;
+def T107_XYZW : R600Reg_128 <"T107.XYZW", [T107_X, T107_Y, T107_Z, T107_W], 107 >;
+def T108_XYZW : R600Reg_128 <"T108.XYZW", [T108_X, T108_Y, T108_Z, T108_W], 108 >;
+def T109_XYZW : R600Reg_128 <"T109.XYZW", [T109_X, T109_Y, T109_Z, T109_W], 109 >;
+def T110_XYZW : R600Reg_128 <"T110.XYZW", [T110_X, T110_Y, T110_Z, T110_W], 110 >;
+def T111_XYZW : R600Reg_128 <"T111.XYZW", [T111_X, T111_Y, T111_Z, T111_W], 111 >;
+def T112_XYZW : R600Reg_128 <"T112.XYZW", [T112_X, T112_Y, T112_Z, T112_W], 112 >;
+def T113_XYZW : R600Reg_128 <"T113.XYZW", [T113_X, T113_Y, T113_Z, T113_W], 113 >;
+def T114_XYZW : R600Reg_128 <"T114.XYZW", [T114_X, T114_Y, T114_Z, T114_W], 114 >;
+def T115_XYZW : R600Reg_128 <"T115.XYZW", [T115_X, T115_Y, T115_Z, T115_W], 115 >;
+def T116_XYZW : R600Reg_128 <"T116.XYZW", [T116_X, T116_Y, T116_Z, T116_W], 116 >;
+def T117_XYZW : R600Reg_128 <"T117.XYZW", [T117_X, T117_Y, T117_Z, T117_W], 117 >;
+def T118_XYZW : R600Reg_128 <"T118.XYZW", [T118_X, T118_Y, T118_Z, T118_W], 118 >;
+def T119_XYZW : R600Reg_128 <"T119.XYZW", [T119_X, T119_Y, T119_Z, T119_W], 119 >;
+def T120_XYZW : R600Reg_128 <"T120.XYZW", [T120_X, T120_Y, T120_Z, T120_W], 120 >;
+def T121_XYZW : R600Reg_128 <"T121.XYZW", [T121_X, T121_Y, T121_Z, T121_W], 121 >;
+def T122_XYZW : R600Reg_128 <"T122.XYZW", [T122_X, T122_Y, T122_Z, T122_W], 122 >;
+def T123_XYZW : R600Reg_128 <"T123.XYZW", [T123_X, T123_Y, T123_Z, T123_W], 123 >;
+def T124_XYZW : R600Reg_128 <"T124.XYZW", [T124_X, T124_Y, T124_Z, T124_W], 124 >;
+def T125_XYZW : R600Reg_128 <"T125.XYZW", [T125_X, T125_Y, T125_Z, T125_W], 125 >;
+def T126_XYZW : R600Reg_128 <"T126.XYZW", [T126_X, T126_Y, T126_Z, T126_W], 126 >;
+def T127_XYZW : R600Reg_128 <"T127.XYZW", [T127_X, T127_Y, T127_Z, T127_W], 127 >;
+
+class RegSet <dag s> {
+ dag set = s;
+}
+
+def ZERO : R600Reg<"0.0", 248>;
+def ONE : R600Reg<"1.0", 249>;
+def NEG_ONE : R600Reg<"-1.0", 249>;
+def ONE_INT : R600Reg<"1", 250>;
+def HALF : R600Reg<"0.5", 252>;
+def NEG_HALF : R600Reg<"-0.5", 252>;
+def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
+def PV_X : R600Reg<"pv.x", 254>;
+def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
+def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
+def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 0>;
+def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 0>;
+
+def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ C0_X
+,C0_Y,C0_Z,C0_W,C1_X,C1_Y,C1_Z,C1_W,C2_X,C2_Y,C2_Z
+,C2_W,C3_X,C3_Y,C3_Z,C3_W,C4_X,C4_Y,C4_Z,C4_W,C5_X
+,C5_Y,C5_Z,C5_W,C6_X,C6_Y,C6_Z,C6_W,C7_X,C7_Y,C7_Z
+,C7_W,C8_X,C8_Y,C8_Z,C8_W,C9_X,C9_Y,C9_Z,C9_W,C10_X
+,C10_Y,C10_Z,C10_W,C11_X,C11_Y,C11_Z,C11_W,C12_X,C12_Y,C12_Z
+,C12_W,C13_X,C13_Y,C13_Z,C13_W,C14_X,C14_Y,C14_Z,C14_W,C15_X
+,C15_Y,C15_Z,C15_W,C16_X,C16_Y,C16_Z,C16_W,C17_X,C17_Y,C17_Z
+,C17_W,C18_X,C18_Y,C18_Z,C18_W,C19_X,C19_Y,C19_Z,C19_W,C20_X
+,C20_Y,C20_Z,C20_W,C21_X,C21_Y,C21_Z,C21_W,C22_X,C22_Y,C22_Z
+,C22_W,C23_X,C23_Y,C23_Z,C23_W,C24_X,C24_Y,C24_Z,C24_W,C25_X
+,C25_Y,C25_Z,C25_W,C26_X,C26_Y,C26_Z,C26_W,C27_X,C27_Y,C27_Z
+,C27_W,C28_X,C28_Y,C28_Z,C28_W,C29_X,C29_Y,C29_Z,C29_W,C30_X
+,C30_Y,C30_Z,C30_W,C31_X,C31_Y,C31_Z,C31_W,C32_X,C32_Y,C32_Z
+,C32_W,C33_X,C33_Y,C33_Z,C33_W,C34_X,C34_Y,C34_Z,C34_W,C35_X
+,C35_Y,C35_Z,C35_W,C36_X,C36_Y,C36_Z,C36_W,C37_X,C37_Y,C37_Z
+,C37_W,C38_X,C38_Y,C38_Z,C38_W,C39_X,C39_Y,C39_Z,C39_W,C40_X
+,C40_Y,C40_Z,C40_W,C41_X,C41_Y,C41_Z,C41_W,C42_X,C42_Y,C42_Z
+,C42_W,C43_X,C43_Y,C43_Z,C43_W,C44_X,C44_Y,C44_Z,C44_W,C45_X
+,C45_Y,C45_Z,C45_W,C46_X,C46_Y,C46_Z,C46_W,C47_X,C47_Y,C47_Z
+,C47_W,C48_X,C48_Y,C48_Z,C48_W,C49_X,C49_Y,C49_Z,C49_W,C50_X
+,C50_Y,C50_Z,C50_W,C51_X,C51_Y,C51_Z,C51_W,C52_X,C52_Y,C52_Z
+,C52_W,C53_X,C53_Y,C53_Z,C53_W,C54_X,C54_Y,C54_Z,C54_W,C55_X
+,C55_Y,C55_Z,C55_W,C56_X,C56_Y,C56_Z,C56_W,C57_X,C57_Y,C57_Z
+,C57_W,C58_X,C58_Y,C58_Z,C58_W,C59_X,C59_Y,C59_Z,C59_W,C60_X
+,C60_Y,C60_Z,C60_W,C61_X,C61_Y,C61_Z,C61_W,C62_X,C62_Y,C62_Z
+,C62_W,C63_X,C63_Y,C63_Z,C63_W,C64_X,C64_Y,C64_Z,C64_W,C65_X
+,C65_Y,C65_Z,C65_W,C66_X,C66_Y,C66_Z,C66_W,C67_X,C67_Y,C67_Z
+,C67_W,C68_X,C68_Y,C68_Z,C68_W,C69_X,C69_Y,C69_Z,C69_W,C70_X
+,C70_Y,C70_Z,C70_W,C71_X,C71_Y,C71_Z,C71_W,C72_X,C72_Y,C72_Z
+,C72_W,C73_X,C73_Y,C73_Z,C73_W,C74_X,C74_Y,C74_Z,C74_W,C75_X
+,C75_Y,C75_Z,C75_W,C76_X,C76_Y,C76_Z,C76_W,C77_X,C77_Y,C77_Z
+,C77_W,C78_X,C78_Y,C78_Z,C78_W,C79_X,C79_Y,C79_Z,C79_W,C80_X
+,C80_Y,C80_Z,C80_W,C81_X,C81_Y,C81_Z,C81_W,C82_X,C82_Y,C82_Z
+,C82_W,C83_X,C83_Y,C83_Z,C83_W,C84_X,C84_Y,C84_Z,C84_W,C85_X
+,C85_Y,C85_Z,C85_W,C86_X,C86_Y,C86_Z,C86_W,C87_X,C87_Y,C87_Z
+,C87_W,C88_X,C88_Y,C88_Z,C88_W,C89_X,C89_Y,C89_Z,C89_W,C90_X
+,C90_Y,C90_Z,C90_W,C91_X,C91_Y,C91_Z,C91_W,C92_X,C92_Y,C92_Z
+,C92_W,C93_X,C93_Y,C93_Z,C93_W,C94_X,C94_Y,C94_Z,C94_W,C95_X
+,C95_Y,C95_Z,C95_W,C96_X,C96_Y,C96_Z,C96_W,C97_X,C97_Y,C97_Z
+,C97_W,C98_X,C98_Y,C98_Z,C98_W,C99_X,C99_Y,C99_Z,C99_W)>;
+
+def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ T0_X
+,T0_Y,T0_Z,T0_W,T1_X,T1_Y,T1_Z,T1_W,T2_X,T2_Y,T2_Z
+,T2_W,T3_X,T3_Y,T3_Z,T3_W,T4_X,T4_Y,T4_Z,T4_W,T5_X
+,T5_Y,T5_Z,T5_W,T6_X,T6_Y,T6_Z,T6_W,T7_X,T7_Y,T7_Z
+,T7_W,T8_X,T8_Y,T8_Z,T8_W,T9_X,T9_Y,T9_Z,T9_W,T10_X
+,T10_Y,T10_Z,T10_W,T11_X,T11_Y,T11_Z,T11_W,T12_X,T12_Y,T12_Z
+,T12_W,T13_X,T13_Y,T13_Z,T13_W,T14_X,T14_Y,T14_Z,T14_W,T15_X
+,T15_Y,T15_Z,T15_W,T16_X,T16_Y,T16_Z,T16_W,T17_X,T17_Y,T17_Z
+,T17_W,T18_X,T18_Y,T18_Z,T18_W,T19_X,T19_Y,T19_Z,T19_W,T20_X
+,T20_Y,T20_Z,T20_W,T21_X,T21_Y,T21_Z,T21_W,T22_X,T22_Y,T22_Z
+,T22_W,T23_X,T23_Y,T23_Z,T23_W,T24_X,T24_Y,T24_Z,T24_W,T25_X
+,T25_Y,T25_Z,T25_W,T26_X,T26_Y,T26_Z,T26_W,T27_X,T27_Y,T27_Z
+,T27_W,T28_X,T28_Y,T28_Z,T28_W,T29_X,T29_Y,T29_Z,T29_W,T30_X
+,T30_Y,T30_Z,T30_W,T31_X,T31_Y,T31_Z,T31_W,T32_X,T32_Y,T32_Z
+,T32_W,T33_X,T33_Y,T33_Z,T33_W,T34_X,T34_Y,T34_Z,T34_W,T35_X
+,T35_Y,T35_Z,T35_W,T36_X,T36_Y,T36_Z,T36_W,T37_X,T37_Y,T37_Z
+,T37_W,T38_X,T38_Y,T38_Z,T38_W,T39_X,T39_Y,T39_Z,T39_W,T40_X
+,T40_Y,T40_Z,T40_W,T41_X,T41_Y,T41_Z,T41_W,T42_X,T42_Y,T42_Z
+,T42_W,T43_X,T43_Y,T43_Z,T43_W,T44_X,T44_Y,T44_Z,T44_W,T45_X
+,T45_Y,T45_Z,T45_W,T46_X,T46_Y,T46_Z,T46_W,T47_X,T47_Y,T47_Z
+,T47_W,T48_X,T48_Y,T48_Z,T48_W,T49_X,T49_Y,T49_Z,T49_W,T50_X
+,T50_Y,T50_Z,T50_W,T51_X,T51_Y,T51_Z,T51_W,T52_X,T52_Y,T52_Z
+,T52_W,T53_X,T53_Y,T53_Z,T53_W,T54_X,T54_Y,T54_Z,T54_W,T55_X
+,T55_Y,T55_Z,T55_W,T56_X,T56_Y,T56_Z,T56_W,T57_X,T57_Y,T57_Z
+,T57_W,T58_X,T58_Y,T58_Z,T58_W,T59_X,T59_Y,T59_Z,T59_W,T60_X
+,T60_Y,T60_Z,T60_W,T61_X,T61_Y,T61_Z,T61_W,T62_X,T62_Y,T62_Z
+,T62_W,T63_X,T63_Y,T63_Z,T63_W,T64_X,T64_Y,T64_Z,T64_W,T65_X
+,T65_Y,T65_Z,T65_W,T66_X,T66_Y,T66_Z,T66_W,T67_X,T67_Y,T67_Z
+,T67_W,T68_X,T68_Y,T68_Z,T68_W,T69_X,T69_Y,T69_Z,T69_W,T70_X
+,T70_Y,T70_Z,T70_W,T71_X,T71_Y,T71_Z,T71_W,T72_X,T72_Y,T72_Z
+,T72_W,T73_X,T73_Y,T73_Z,T73_W,T74_X,T74_Y,T74_Z,T74_W,T75_X
+,T75_Y,T75_Z,T75_W,T76_X,T76_Y,T76_Z,T76_W,T77_X,T77_Y,T77_Z
+,T77_W,T78_X,T78_Y,T78_Z,T78_W,T79_X,T79_Y,T79_Z,T79_W,T80_X
+,T80_Y,T80_Z,T80_W,T81_X,T81_Y,T81_Z,T81_W,T82_X,T82_Y,T82_Z
+,T82_W,T83_X,T83_Y,T83_Z,T83_W,T84_X,T84_Y,T84_Z,T84_W,T85_X
+,T85_Y,T85_Z,T85_W,T86_X,T86_Y,T86_Z,T86_W,T87_X,T87_Y,T87_Z
+,T87_W,T88_X,T88_Y,T88_Z,T88_W,T89_X,T89_Y,T89_Z,T89_W,T90_X
+,T90_Y,T90_Z,T90_W,T91_X,T91_Y,T91_Z,T91_W,T92_X,T92_Y,T92_Z
+,T92_W,T93_X,T93_Y,T93_Z,T93_W,T94_X,T94_Y,T94_Z,T94_W,T95_X
+,T95_Y,T95_Z,T95_W,T96_X,T96_Y,T96_Z,T96_W,T97_X,T97_Y,T97_Z
+,T97_W,T98_X,T98_Y,T98_Z,T98_W,T99_X,T99_Y,T99_Z,T99_W,T100_X
+,T100_Y,T100_Z,T100_W,T101_X,T101_Y,T101_Z,T101_W,T102_X,T102_Y,T102_Z
+,T102_W,T103_X,T103_Y,T103_Z,T103_W,T104_X,T104_Y,T104_Z,T104_W,T105_X
+,T105_Y,T105_Z,T105_W,T106_X,T106_Y,T106_Z,T106_W,T107_X,T107_Y,T107_Z
+,T107_W,T108_X,T108_Y,T108_Z,T108_W,T109_X,T109_Y,T109_Z,T109_W,T110_X
+,T110_Y,T110_Z,T110_W,T111_X,T111_Y,T111_Z,T111_W,T112_X,T112_Y,T112_Z
+,T112_W,T113_X,T113_Y,T113_Z,T113_W,T114_X,T114_Y,T114_Z,T114_W,T115_X
+,T115_Y,T115_Z,T115_W,T116_X,T116_Y,T116_Z,T116_W,T117_X,T117_Y,T117_Z
+,T117_W,T118_X,T118_Y,T118_Z,T118_W,T119_X,T119_Y,T119_Z,T119_W,T120_X
+,T120_Y,T120_Z,T120_W,T121_X,T121_Y,T121_Z,T121_W,T122_X,T122_Y,T122_Z
+,T122_W,T123_X,T123_Y,T123_Z,T123_W,T124_X,T124_Y,T124_Z,T124_W,T125_X
+,T125_Y,T125_Z,T125_W,T126_X,T126_Y,T126_Z,T126_W,T127_X,T127_Y,T127_Z
+,T127_W)>;
+
+def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ T0_X
+,T1_X,T2_X,T3_X,T4_X,T5_X,T6_X,T7_X,T8_X,T9_X,T10_X
+,T11_X,T12_X,T13_X,T14_X,T15_X,T16_X,T17_X,T18_X,T19_X,T20_X
+,T21_X,T22_X,T23_X,T24_X,T25_X,T26_X,T27_X,T28_X,T29_X,T30_X
+,T31_X,T32_X,T33_X,T34_X,T35_X,T36_X,T37_X,T38_X,T39_X,T40_X
+,T41_X,T42_X,T43_X,T44_X,T45_X,T46_X,T47_X,T48_X,T49_X,T50_X
+,T51_X,T52_X,T53_X,T54_X,T55_X,T56_X,T57_X,T58_X,T59_X,T60_X
+,T61_X,T62_X,T63_X,T64_X,T65_X,T66_X,T67_X,T68_X,T69_X,T70_X
+,T71_X,T72_X,T73_X,T74_X,T75_X,T76_X,T77_X,T78_X,T79_X,T80_X
+,T81_X,T82_X,T83_X,T84_X,T85_X,T86_X,T87_X,T88_X,T89_X,T90_X
+,T91_X,T92_X,T93_X,T94_X,T95_X,T96_X,T97_X,T98_X,T99_X,T100_X
+,T101_X,T102_X,T103_X,T104_X,T105_X,T106_X,T107_X,T108_X,T109_X,T110_X
+,T111_X,T112_X,T113_X,T114_X,T115_X,T116_X,T117_X,T118_X,T119_X,T120_X
+,T121_X,T122_X,T123_X,T124_X,T125_X,T126_X,T127_X)>;
+
+def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ R600_TReg32,
+ R600_CReg32,
+ ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
+
+def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
+ PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
+
+def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
+ PREDICATE_BIT)>;
+
+def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
+ T0_XYZW
+,T1_XYZW,T2_XYZW,T3_XYZW,T4_XYZW,T5_XYZW,T6_XYZW,T7_XYZW,T8_XYZW,T9_XYZW,T10_XYZW
+,T11_XYZW,T12_XYZW,T13_XYZW,T14_XYZW,T15_XYZW,T16_XYZW,T17_XYZW,T18_XYZW,T19_XYZW,T20_XYZW
+,T21_XYZW,T22_XYZW,T23_XYZW,T24_XYZW,T25_XYZW,T26_XYZW,T27_XYZW,T28_XYZW,T29_XYZW,T30_XYZW
+,T31_XYZW,T32_XYZW,T33_XYZW,T34_XYZW,T35_XYZW,T36_XYZW,T37_XYZW,T38_XYZW,T39_XYZW,T40_XYZW
+,T41_XYZW,T42_XYZW,T43_XYZW,T44_XYZW,T45_XYZW,T46_XYZW,T47_XYZW,T48_XYZW,T49_XYZW,T50_XYZW
+,T51_XYZW,T52_XYZW,T53_XYZW,T54_XYZW,T55_XYZW,T56_XYZW,T57_XYZW,T58_XYZW,T59_XYZW,T60_XYZW
+,T61_XYZW,T62_XYZW,T63_XYZW,T64_XYZW,T65_XYZW,T66_XYZW,T67_XYZW,T68_XYZW,T69_XYZW,T70_XYZW
+,T71_XYZW,T72_XYZW,T73_XYZW,T74_XYZW,T75_XYZW,T76_XYZW,T77_XYZW,T78_XYZW,T79_XYZW,T80_XYZW
+,T81_XYZW,T82_XYZW,T83_XYZW,T84_XYZW,T85_XYZW,T86_XYZW,T87_XYZW,T88_XYZW,T89_XYZW,T90_XYZW
+,T91_XYZW,T92_XYZW,T93_XYZW,T94_XYZW,T95_XYZW,T96_XYZW,T97_XYZW,T98_XYZW,T99_XYZW,T100_XYZW
+,T101_XYZW,T102_XYZW,T103_XYZW,T104_XYZW,T105_XYZW,T106_XYZW,T107_XYZW,T108_XYZW,T109_XYZW,T110_XYZW
+,T111_XYZW,T112_XYZW,T113_XYZW,T114_XYZW,T115_XYZW,T116_XYZW,T117_XYZW,T118_XYZW,T119_XYZW,T120_XYZW
+,T121_XYZW,T122_XYZW,T123_XYZW,T124_XYZW,T125_XYZW,T126_XYZW,T127_XYZW)>
+{
+ let CopyCost = -1;
+}
+
diff --git a/lib/Target/AMDGPU/R600Schedule.td b/lib/Target/AMDGPU/R600Schedule.td
new file mode 100644
index 0000000..7ede181
--- /dev/null
+++ b/lib/Target/AMDGPU/R600Schedule.td
@@ -0,0 +1,36 @@
+//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
+// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
+// slot has been removed.
+//
+//===----------------------------------------------------------------------===//
+
+
+def ALU_X : FuncUnit;
+def ALU_Y : FuncUnit;
+def ALU_Z : FuncUnit;
+def ALU_W : FuncUnit;
+def TRANS : FuncUnit;
+
+def AnyALU : InstrItinClass;
+def VecALU : InstrItinClass;
+def TransALU : InstrItinClass;
+
+def R600_EG_Itin : ProcessorItineraries <
+ [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
+ [],
+ [
+ InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
+ InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
+ InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
+ InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
+ ]
+>;
diff --git a/lib/Target/AMDGPU/SIAssignInterpRegs.cpp b/lib/Target/AMDGPU/SIAssignInterpRegs.cpp
new file mode 100644
index 0000000..3ee03ae
--- /dev/null
+++ b/lib/Target/AMDGPU/SIAssignInterpRegs.cpp
@@ -0,0 +1,136 @@
+//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass maps the pseudo interpolation registers to the correct physical
+// registers. Prior to executing a fragment shader, the GPU loads interpolation
+// parameters into physical registers. The specific physical register that each
+// interpolation parameter ends up in depends on the type of the interpolation
+// parameter as well as how many interpolation parameters are used by the
+// shader.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+#include "AMDGPU.h"
+#include "AMDIL.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class SIAssignInterpRegsPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ TargetMachine &TM;
+
+ void AddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
+ unsigned physReg, unsigned virtReg);
+
+public:
+ SIAssignInterpRegsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "SI Assign intrpolation registers"; }
+};
+
+} // End anonymous namespace
+
+char SIAssignInterpRegsPass::ID = 0;
+
+#define INTERP_VALUES 16
+
+struct interp_info {
+ bool enabled;
+ unsigned regs[3];
+ unsigned reg_count;
+};
+
+
+FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
+ return new SIAssignInterpRegsPass(tm);
+}
+
+bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
+{
+
+ struct interp_info InterpUse[INTERP_VALUES] = {
+ {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
+ {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
+ {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
+ {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
+ {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
+ {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
+ {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
+ {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
+ {false, {AMDGPU::POS_X_FLOAT}, 1},
+ {false, {AMDGPU::POS_Y_FLOAT}, 1},
+ {false, {AMDGPU::POS_Z_FLOAT}, 1},
+ {false, {AMDGPU::POS_W_FLOAT}, 1},
+ {false, {AMDGPU::FRONT_FACE}, 1},
+ {false, {AMDGPU::ANCILLARY}, 1},
+ {false, {AMDGPU::SAMPLE_COVERAGE}, 1},
+ {false, {AMDGPU::POS_FIXED_PT}, 1}
+ };
+
+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ /* First pass, mark the interpolation values that are used. */
+ for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
+ for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
+ reg_idx++) {
+ InterpUse[interp_idx].enabled =
+ !MRI.use_empty(InterpUse[interp_idx].regs[reg_idx]);
+ }
+ }
+
+ unsigned used_vgprs = 0;
+
+ /* Second pass, replace with VGPRs. */
+ for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
+ if (!InterpUse[interp_idx].enabled) {
+ continue;
+ }
+ MFI->spi_ps_input_addr |= (1 << interp_idx);
+
+ for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
+ reg_idx++, used_vgprs++) {
+ unsigned new_reg = AMDGPU::VReg_32RegClass.getRegister(used_vgprs);
+ unsigned virt_reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ MRI.replaceRegWith(InterpUse[interp_idx].regs[reg_idx], virt_reg);
+ AddLiveIn(&MF, MRI, new_reg, virt_reg);
+ }
+ }
+
+ return false;
+}
+
+void SIAssignInterpRegsPass::AddLiveIn(MachineFunction * MF,
+ MachineRegisterInfo & MRI,
+ unsigned physReg, unsigned virtReg)
+{
+ const TargetInstrInfo * TII = TM.getInstrInfo();
+ if (!MRI.isLiveIn(physReg)) {
+ MRI.addLiveIn(physReg, virtReg);
+ MF->front().addLiveIn(physReg);
+ BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), virtReg)
+ .addReg(physReg);
+ } else {
+ MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
+ }
+}
diff --git a/lib/Target/AMDGPU/SIGenRegisterInfo.pl b/lib/Target/AMDGPU/SIGenRegisterInfo.pl
new file mode 100644
index 0000000..4cfbb18
--- /dev/null
+++ b/lib/Target/AMDGPU/SIGenRegisterInfo.pl
@@ -0,0 +1,232 @@
+#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+#
+# This perl script prints to stdout .td code to be used as SIRegisterInfo.td
+# it also generates a file called SIHwRegInfo.include, which contains helper
+# functions for determining the hw encoding of registers.
+#
+#===------------------------------------------------------------------------===#
+
+use strict;
+use warnings;
+
+my $SGPR_COUNT = 104;
+my $VGPR_COUNT = 256;
+
+my $SGPR_MAX_IDX = $SGPR_COUNT - 1;
+my $VGPR_MAX_IDX = $VGPR_COUNT - 1;
+
+my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : '';
+
+print <<STRING;
+
+let Namespace = "AMDGPU" in {
+ def low : SubRegIndex;
+ def high : SubRegIndex;
+
+ def sub0 : SubRegIndex;
+ def sub1 : SubRegIndex;
+ def sub2 : SubRegIndex;
+ def sub3 : SubRegIndex;
+ def sub4 : SubRegIndex;
+ def sub5 : SubRegIndex;
+ def sub6 : SubRegIndex;
+ def sub7 : SubRegIndex;
+}
+
+class SIReg <string n, bits<16> encoding = 0> : Register<n> {
+ let Namespace = "AMDGPU";
+ let HWEncoding = encoding;
+}
+
+class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [low, high];
+ let HWEncoding = encoding;
+}
+
+class SI_128 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
+ let HWEncoding = encoding;
+}
+
+class SI_256 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
+ let HWEncoding = encoding;
+}
+
+class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
+
+class VGPR_32 <bits<16> num, string name> : SIReg<name, num>;
+
+class SGPR_64 <bits<16> num, string name, list<Register> subregs> :
+ SI_64 <name, subregs, num>;
+
+class VGPR_64 <bits<16> num, string name, list<Register> subregs> :
+ SI_64 <name, subregs, num>;
+
+class SGPR_128 <bits<16> num, string name, list<Register> subregs> :
+ SI_128 <name, subregs, num>;
+
+class VGPR_128 <bits<16> num, string name, list<Register> subregs> :
+ SI_128 <name, subregs, num>;
+
+class SGPR_256 <bits<16> num, string name, list<Register> subregs> :
+ SI_256 <name, subregs, num>;
+
+def VCC : SIReg<"VCC">;
+def SCC : SIReg<"SCC">;
+def SREG_LIT_0 : SIReg <"S LIT 0", 128>;
+
+def M0 : SIReg <"M0", 124>;
+
+//Interpolation registers
+
+def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
+def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
+def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
+def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
+def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
+def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
+def PERSP_I_W : SIReg <"PERSP_I_W">;
+def PERSP_J_W : SIReg <"PERSP_J_W">;
+def PERSP_1_W : SIReg <"PERSP_1_W">;
+def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
+def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
+def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
+def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
+def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
+def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
+def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
+def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
+def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
+def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
+def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
+def FRONT_FACE : SIReg <"FRONT_FACE">;
+def ANCILLARY : SIReg <"ANCILLARY">;
+def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
+def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
+
+STRING
+
+#32 bit register
+
+my @SGPR;
+for (my $i = 0; $i < $SGPR_COUNT; $i++) {
+ print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n";
+ $SGPR[$i] = "SGPR$i";
+}
+
+my @VGPR;
+for (my $i = 0; $i < $VGPR_COUNT; $i++) {
+ print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n";
+ $VGPR[$i] = "VGPR$i";
+}
+
+print <<STRING;
+
+def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "SGPR%u", 0, $SGPR_MAX_IDX), SREG_LIT_0, M0)
+>;
+
+def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "VGPR%u", 0, $VGPR_MAX_IDX),
+ PERSP_SAMPLE_I, PERSP_SAMPLE_J,
+ PERSP_CENTER_I, PERSP_CENTER_J,
+ PERSP_CENTROID_I, PERSP_CENTROID_J,
+ PERSP_I_W, PERSP_J_W, PERSP_1_W,
+ LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
+ LINEAR_CENTER_I, LINEAR_CENTER_J,
+ LINEAR_CENTROID_I, LINEAR_CENTROID_J,
+ LINE_STIPPLE_TEX_COORD,
+ POS_X_FLOAT,
+ POS_Y_FLOAT,
+ POS_Z_FLOAT,
+ POS_W_FLOAT,
+ FRONT_FACE,
+ ANCILLARY,
+ SAMPLE_COVERAGE,
+ POS_FIXED_PT
+ )
+>;
+
+def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add VReg_32, SReg_32)
+>;
+
+def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
+def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
+
+STRING
+
+my @subregs_64 = ('low', 'high');
+my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w');
+my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7');
+
+my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64'));
+my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32'));
+my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32'));
+
+my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64'));
+my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32'));
+
+
+my $sgpr64_list = join(',', @SGPR64);
+my $vgpr64_list = join(',', @VGPR64);
+print <<STRING;
+
+def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64,
+ (add $sgpr64_list, $vgpr64_list)
+>;
+
+STRING
+
+sub print_sgpr_class {
+ my ($reg_width, $sub_reg_ref, @types) = @_;
+ return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types);
+}
+
+sub print_vgpr_class {
+ my ($reg_width, $sub_reg_ref, @types) = @_;
+ return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types);
+}
+
+sub print_reg_class {
+ my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_;
+ my @registers;
+ my $component_count = $reg_width / 32;
+
+ for (my $i = 0; $i < $reg_count; $i += $component_count) {
+ my $reg_name = $reg_prefix . $i . '_' . $reg_width;
+ my @sub_regs;
+ for (my $idx = 0; $idx < $component_count; $idx++) {
+ my $sub_idx = $i + $idx;
+ push(@sub_regs, $reg_prefix . $sub_idx);
+ }
+ print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n";
+ if ($i % 10 == 0) {
+ $reg_name .= "\n";
+ }
+ push (@registers, $reg_name);
+
+ }
+
+ #Add VCC to SReg_64
+ if ($class_prefix eq 'SReg' and $reg_width == 64) {
+ push (@registers, 'VCC')
+ }
+
+ my $reg_list = join(', ', @registers);
+
+ print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n (add $reg_list)\n>{\n";
+ print "}\n";
+ return @registers;
+}
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
new file mode 100644
index 0000000..092c2fa
--- /dev/null
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -0,0 +1,408 @@
+//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
+// mostly EmitInstrWithCustomInserter().
+//
+//===----------------------------------------------------------------------===//
+
+#include "SIISelLowering.h"
+#include "AMDIL.h"
+#include "AMDILIntrinsicInfo.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+using namespace llvm;
+
+SITargetLowering::SITargetLowering(TargetMachine &TM) :
+ AMDGPUTargetLowering(TM),
+ TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo()))
+{
+ addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
+ addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
+ addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
+ addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
+
+ addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
+
+ computeRegisterProperties();
+
+ setOperationAction(ISD::AND, MVT::i1, Custom);
+
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
+ setOperationAction(ISD::ADD, MVT::i32, Legal);
+
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // We need to custom lower loads from the USER_SGPR address space, so we can
+ // add the SGPRs as livein registers.
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setTargetDAGCombine(ISD::SELECT_CC);
+
+ setTargetDAGCombine(ISD::SETCC);
+}
+
+MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr * MI, MachineBasicBlock * BB) const
+{
+ const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
+ MachineBasicBlock::iterator I = MI;
+
+ if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
+ AppendS_WAITCNT(MI, *BB, llvm::next(I));
+ return BB;
+ }
+
+ switch (MI->getOpcode()) {
+ default:
+ return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+
+ case AMDGPU::CLAMP_SI:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ // VSRC1-2 are unused, but we still need to fill all the
+ // operand slots, so we just reuse the VSRC0 operand
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(1))
+ .addImm(0) // ABS
+ .addImm(1) // CLAMP
+ .addImm(0) // OMOD
+ .addImm(0); // NEG
+ MI->eraseFromParent();
+ break;
+
+ case AMDGPU::FABS_SI:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ // VSRC1-2 are unused, but we still need to fill all the
+ // operand slots, so we just reuse the VSRC0 operand
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(1))
+ .addImm(1) // ABS
+ .addImm(0) // CLAMP
+ .addImm(0) // OMOD
+ .addImm(0); // NEG
+ MI->eraseFromParent();
+ break;
+
+ case AMDGPU::FNEG_SI:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ // VSRC1-2 are unused, but we still need to fill all the
+ // operand slots, so we just reuse the VSRC0 operand
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(1))
+ .addImm(0) // ABS
+ .addImm(0) // CLAMP
+ .addImm(0) // OMOD
+ .addImm(1); // NEG
+ MI->eraseFromParent();
+ break;
+
+ case AMDGPU::SI_INTERP:
+ LowerSI_INTERP(MI, *BB, I, MRI);
+ break;
+ case AMDGPU::SI_INTERP_CONST:
+ LowerSI_INTERP_CONST(MI, *BB, I);
+ break;
+ case AMDGPU::SI_V_CNDLT:
+ LowerSI_V_CNDLT(MI, *BB, I, MRI);
+ break;
+ }
+ return BB;
+}
+
+void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I) const
+{
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+}
+
+void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
+{
+ unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ MachineOperand dst = MI->getOperand(0);
+ MachineOperand iReg = MI->getOperand(1);
+ MachineOperand jReg = MI->getOperand(2);
+ MachineOperand attr_chan = MI->getOperand(3);
+ MachineOperand attr = MI->getOperand(4);
+ MachineOperand params = MI->getOperand(5);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32))
+ .addReg(AMDGPU::M0)
+ .addOperand(params);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
+ .addOperand(iReg)
+ .addOperand(attr_chan)
+ .addOperand(attr);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
+ .addOperand(dst)
+ .addReg(tmp)
+ .addOperand(jReg)
+ .addOperand(attr_chan)
+ .addOperand(attr);
+
+ MI->eraseFromParent();
+}
+
+void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
+ MachineBasicBlock &BB, MachineBasicBlock::iterator I) const
+{
+ MachineOperand dst = MI->getOperand(0);
+ MachineOperand attr_chan = MI->getOperand(1);
+ MachineOperand attr = MI->getOperand(2);
+ MachineOperand params = MI->getOperand(3);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32))
+ .addReg(AMDGPU::M0)
+ .addOperand(params);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
+ .addOperand(dst)
+ .addOperand(attr_chan)
+ .addOperand(attr);
+
+ MI->eraseFromParent();
+}
+
+void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
+{
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_LT_F32_e32),
+ AMDGPU::VCC)
+ .addOperand(MI->getOperand(1))
+ .addReg(AMDGPU::SREG_LIT_0);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::VCC)
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(3));
+
+ MI->eraseFromParent();
+}
+
+EVT SITargetLowering::getSetCCResultType(EVT VT) const
+{
+ return MVT::i1;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode()) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT VT = Op.getValueType();
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::SI_vs_load_buffer_index:
+ return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
+ AMDGPU::VGPR0, VT);
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ }
+ break;
+ }
+ }
+ return SDValue();
+}
+
+/// Loweri1ContextSwitch - The function is for lowering i1 operations on the
+/// VCC register. In the VALU context, VCC is a one bit register, but in the
+/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
+/// the SALU can perform operations on the VCC register, we need to promote
+/// the operand types from i1 to i64 in order for tablegen to be able to match
+/// this operation to the correct SALU instruction. We do this promotion by
+/// wrapping the operands in a CopyToReg node.
+///
+SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
+ SelectionDAG &DAG,
+ unsigned VCCNode) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
+ DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
+ Op.getOperand(0)),
+ DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
+ Op.getOperand(1)));
+
+ return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
+}
+
+SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue CC = Op.getOperand(1);
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue JumpT = Op.getOperand(4);
+ SDValue CmpValue;
+ SDValue Result;
+ CmpValue = DAG.getNode(
+ ISD::SETCC,
+ Op.getDebugLoc(),
+ MVT::i1,
+ LHS, RHS,
+ CC);
+
+ Result = DAG.getNode(
+ AMDGPUISD::BRANCH_COND,
+ CmpValue.getDebugLoc(),
+ MVT::Other, Chain,
+ JumpT, CmpValue);
+ return Result;
+}
+
+SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
+
+ assert(Ptr);
+
+ unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
+
+ // We only need to lower USER_SGPR address space loads
+ if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
+ return SDValue();
+ }
+
+ // Loads from the USER_SGPR address space can only have constant value
+ // pointers.
+ ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
+ assert(BasePtr);
+
+ unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
+ const TargetRegisterClass * dstClass;
+ switch (TypeDwordWidth) {
+ default:
+ assert(!"USER_SGPR value size not implemented");
+ return SDValue();
+ case 1:
+ dstClass = &AMDGPU::SReg_32RegClass;
+ break;
+ case 2:
+ dstClass = &AMDGPU::SReg_64RegClass;
+ break;
+ }
+ uint64_t Index = BasePtr->getZExtValue();
+ assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
+ unsigned SGPRIndex = Index / TypeDwordWidth;
+ unsigned Reg = dstClass->getRegister(SGPRIndex);
+
+ DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
+ VT));
+ return SDValue();
+}
+
+SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::SELECT_CC: {
+ N->dump();
+ ConstantSDNode *True, *False;
+ // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
+ if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ && True->isAllOnesValue()
+ && False->isNullValue()
+ && VT == MVT::i1) {
+ return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(4));
+
+ }
+ break;
+ }
+ case ISD::SETCC: {
+ SDValue Arg0 = N->getOperand(0);
+ SDValue Arg1 = N->getOperand(1);
+ SDValue CC = N->getOperand(2);
+ ConstantSDNode * C = NULL;
+ ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
+
+ // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
+ if (VT == MVT::i1
+ && Arg0.getOpcode() == ISD::SIGN_EXTEND
+ && Arg0.getOperand(0).getValueType() == MVT::i1
+ && (C = dyn_cast<ConstantSDNode>(Arg1))
+ && C->isNullValue()
+ && CCOp == ISD::SETNE) {
+ return SimplifySetCC(VT, Arg0.getOperand(0),
+ DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
+ }
+ break;
+ }
+ }
+ return SDValue();
+}
+
+#define NODE_NAME_CASE(node) case SIISD::node: return #node;
+
+const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode) {
+ default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
+ NODE_NAME_CASE(VCC_AND)
+ NODE_NAME_CASE(VCC_BITCAST)
+ }
+}
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
new file mode 100644
index 0000000..cf655a1
--- /dev/null
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -0,0 +1,57 @@
+//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SIISELLOWERING_H
+#define SIISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+#include "SIInstrInfo.h"
+
+namespace llvm {
+
+class SITargetLowering : public AMDGPUTargetLowering
+{
+ const SIInstrInfo * TII;
+
+ /// AppendS_WAITCNT - Memory reads and writes are syncronized using the
+ /// S_WAITCNT instruction. This function takes the most conservative
+ /// approach and inserts an S_WAITCNT instruction after every read and
+ /// write.
+ void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I) const;
+ void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
+ void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I) const;
+ void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
+
+ SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
+ unsigned VCCNode) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+
+public:
+ SITargetLowering(TargetMachine &tm);
+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
+ MachineBasicBlock * BB) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ virtual const char* getTargetNodeName(unsigned Opcode) const;
+};
+
+} // End namespace llvm
+
+#endif //SIISELLOWERING_H
diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td
new file mode 100644
index 0000000..8f56e21
--- /dev/null
+++ b/lib/Target/AMDGPU/SIInstrFormats.td
@@ -0,0 +1,131 @@
+//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction format definitions.
+//
+// Instructions with _32 take 32-bit operands.
+// Instructions with _64 take 64-bit operands.
+//
+// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
+// encoding is the standard encoding, but instruction that make use of
+// any of the instruction modifiers must use the 64-bit encoding.
+//
+// Instructions with _e32 use the 32-bit encoding.
+// Instructions with _e64 use the 64-bit encoding.
+//
+//===----------------------------------------------------------------------===//
+
+
+class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
+ : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
+
+class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
+ : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
+
+
+class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
+ : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
+
+class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
+ : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
+
+class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
+ : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
+
+class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
+ : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
+
+class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
+ : SOP2 <op, (outs VCCReg:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
+
+class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern> :
+ VOP1 <
+ op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
+ >;
+
+multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
+ def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
+ def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
+
+ def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
+
+ def _e64 : VOP3_64 <
+ {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern> :
+ VOP2 <
+ op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
+ >;
+
+multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
+
+ def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
+
+ def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
+ def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
+
+ def _e64 : VOP3_64 <
+ {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
+ : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
+
+class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
+ : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
+
+class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern> :
+ VOPC <
+ op, (ins arc:$src0, vrc:$src1), opName, pattern
+ >;
+
+multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> {
+
+ def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>;
+
+ def _e64 : VOP3_32 <
+ {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
+
+ def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
+
+ def _e64 : VOP3_64 <
+ {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
+ : SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
+
+class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
+ : SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
+
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
new file mode 100644
index 0000000..8d0d155
--- /dev/null
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -0,0 +1,75 @@
+//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIInstrInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
+ : AMDGPUInstrInfo(tm),
+ RI(tm, *this)
+ { }
+
+const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const
+{
+ return RI;
+}
+
+void
+SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+
+ // If we are trying to copy to or from SCC, there is a bug somewhere else in
+ // the backend. While it may be theoretically possible to do this, it should
+ // never be necessary.
+ assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const
+{
+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
+ MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
+ MachineInstrBuilder(MI).addImm(Imm);
+
+ return MI;
+
+}
+
+bool SIInstrInfo::isMov(unsigned Opcode) const
+{
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_IMM_F32:
+ case AMDGPU::V_MOV_IMM_I32:
+ case AMDGPU::S_MOV_IMM_I32:
+ return true;
+ }
+}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
new file mode 100644
index 0000000..a60fd8c
--- /dev/null
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -0,0 +1,61 @@
+//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface definition for SIInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIINSTRINFO_H
+#define SIINSTRINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "SIRegisterInfo.h"
+
+namespace llvm {
+
+class SIInstrInfo : public AMDGPUInstrInfo {
+private:
+ const SIRegisterInfo RI;
+
+public:
+ explicit SIInstrInfo(AMDGPUTargetMachine &tm);
+
+ const SIRegisterInfo &getRegisterInfo() const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ /// getEncodingType - Returns the encoding type of this instruction.
+ unsigned getEncodingType(const MachineInstr &MI) const;
+
+ /// getEncodingBytes - Returns the size of this instructions encoding in
+ /// number of bytes.
+ unsigned getEncodingBytes(const MachineInstr &MI) const;
+
+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const;
+
+ virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
+ virtual bool isMov(unsigned Opcode) const;
+
+ };
+
+} // End namespace llvm
+
+namespace SIInstrFlags {
+ enum Flags {
+ // First 4 bits are the instruction encoding
+ NEED_WAIT = 1 << 4
+ };
+}
+
+#endif //SIINSTRINFO_H
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
new file mode 100644
index 0000000..81df55d
--- /dev/null
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -0,0 +1,512 @@
+//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI DAG Profiles
+//===----------------------------------------------------------------------===//
+def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
+]>;
+
+//===----------------------------------------------------------------------===//
+// SI DAG Nodes
+//===----------------------------------------------------------------------===//
+
+// and operation on 64-bit wide vcc
+def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// Special bitcast node for sharing VCC register between VALU and SALU
+def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
+>;
+
+class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
+ AMDGPUInst<outs, ins, asm, pattern> {
+
+ field bits<4> EncodingType = 0;
+ field bits<1> NeedWait = 0;
+
+ let TSFlags{3-0} = EncodingType;
+ let TSFlags{4} = NeedWait;
+
+}
+
+class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+
+ field bits<32> Inst;
+}
+
+class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+
+ field bits<64> Inst;
+}
+
+class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
+ let EncoderMethod = "encodeOperand";
+ let MIOperandInfo = opInfo;
+}
+
+def IMM16bit : ImmLeaf <
+ i16,
+ [{return isInt<16>(Imm);}]
+>;
+
+def IMM8bit : ImmLeaf <
+ i32,
+ [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
+>;
+
+def IMM12bit : ImmLeaf <
+ i16,
+ [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
+>;
+
+def IMM32bitIn64bit : ImmLeaf <
+ i64,
+ [{return isInt<32>(Imm);}]
+>;
+
+class GPR4Align <RegisterClass rc> : Operand <vAny> {
+ let EncoderMethod = "GPR4AlignEncode";
+ let MIOperandInfo = (ops rc:$reg);
+}
+
+class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
+ let EncoderMethod = "GPR2AlignEncode";
+ let MIOperandInfo = (ops rc:$reg);
+}
+
+def i32Literal : Operand <i32> {
+ let EncoderMethod = "i32LiteralEncode";
+}
+
+// i64Literal uses the same encoder method as i32 literal, because an
+// i64Literal is really a i32 literal with the top 32-bits all set to zero.
+def i64Literal : Operand <i64> {
+ let EncoderMethod = "i32LiteralEncode";
+}
+
+def SMRDmemrr : Operand<iPTR> {
+ let MIOperandInfo = (ops SReg_64, SReg_32);
+ let EncoderMethod = "GPR2AlignEncode";
+}
+
+def SMRDmemri : Operand<iPTR> {
+ let MIOperandInfo = (ops SReg_64, i32imm);
+ let EncoderMethod = "SMRDmemriEncode";
+}
+
+def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
+def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
+
+def EXP : Enc64<
+ (outs),
+ (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
+ [] > {
+
+ bits<4> EN;
+ bits<6> TGT;
+ bits<1> COMPR;
+ bits<1> DONE;
+ bits<1> VM;
+ bits<8> VSRC0;
+ bits<8> VSRC1;
+ bits<8> VSRC2;
+ bits<8> VSRC3;
+
+ let Inst{3-0} = EN;
+ let Inst{9-4} = TGT;
+ let Inst{10} = COMPR;
+ let Inst{11} = DONE;
+ let Inst{12} = VM;
+ let Inst{31-26} = 0x3e;
+ let Inst{39-32} = VSRC0;
+ let Inst{47-40} = VSRC1;
+ let Inst{55-48} = VSRC2;
+ let Inst{63-56} = VSRC3;
+ let EncodingType = 0; //SIInstrEncodingType::EXP
+
+ let NeedWait = 1;
+ let usesCustomInserter = 1;
+}
+
+class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<4> DMASK;
+ bits<1> UNORM;
+ bits<1> GLC;
+ bits<1> DA;
+ bits<1> R128;
+ bits<1> TFE;
+ bits<1> LWE;
+ bits<1> SLC;
+ bits<8> VADDR;
+ bits<5> SRSRC;
+ bits<5> SSAMP;
+
+ let Inst{11-8} = DMASK;
+ let Inst{12} = UNORM;
+ let Inst{13} = GLC;
+ let Inst{14} = DA;
+ let Inst{15} = R128;
+ let Inst{16} = TFE;
+ let Inst{17} = LWE;
+ let Inst{24-18} = op;
+ let Inst{25} = SLC;
+ let Inst{31-26} = 0x3c;
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC;
+ let Inst{57-53} = SSAMP;
+
+ let EncodingType = 2; //SIInstrEncodingType::MIMG
+
+ let NeedWait = 1;
+ let usesCustomInserter = 1;
+}
+
+class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64<outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<12> OFFSET;
+ bits<1> OFFEN;
+ bits<1> IDXEN;
+ bits<1> GLC;
+ bits<1> ADDR64;
+ bits<4> DFMT;
+ bits<3> NFMT;
+ bits<8> VADDR;
+ bits<5> SRSRC;
+ bits<1> SLC;
+ bits<1> TFE;
+ bits<8> SOFFSET;
+
+ let Inst{11-0} = OFFSET;
+ let Inst{12} = OFFEN;
+ let Inst{13} = IDXEN;
+ let Inst{14} = GLC;
+ let Inst{15} = ADDR64;
+ let Inst{18-16} = op;
+ let Inst{22-19} = DFMT;
+ let Inst{25-23} = NFMT;
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC;
+ let Inst{54} = SLC;
+ let Inst{55} = TFE;
+ let Inst{63-56} = SOFFSET;
+ let EncodingType = 3; //SIInstrEncodingType::MTBUF
+
+ let NeedWait = 1;
+ let usesCustomInserter = 1;
+ let neverHasSideEffects = 1;
+}
+
+class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64<outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<12> OFFSET;
+ bits<1> OFFEN;
+ bits<1> IDXEN;
+ bits<1> GLC;
+ bits<1> ADDR64;
+ bits<1> LDS;
+ bits<8> VADDR;
+ bits<5> SRSRC;
+ bits<1> SLC;
+ bits<1> TFE;
+ bits<8> SOFFSET;
+
+ let Inst{11-0} = OFFSET;
+ let Inst{12} = OFFEN;
+ let Inst{13} = IDXEN;
+ let Inst{14} = GLC;
+ let Inst{15} = ADDR64;
+ let Inst{16} = LDS;
+ let Inst{24-18} = op;
+ let Inst{31-26} = 0x38; //encoding
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC;
+ let Inst{54} = SLC;
+ let Inst{55} = TFE;
+ let Inst{63-56} = SOFFSET;
+ let EncodingType = 4; //SIInstrEncodingType::MUBUF
+
+ let NeedWait = 1;
+ let usesCustomInserter = 1;
+ let neverHasSideEffects = 1;
+}
+
+class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<15> PTR;
+ bits<8> OFFSET = PTR{7-0};
+ bits<1> IMM = PTR{8};
+ bits<6> SBASE = PTR{14-9};
+
+ let Inst{7-0} = OFFSET;
+ let Inst{8} = IMM;
+ let Inst{14-9} = SBASE;
+ let Inst{21-15} = SDST;
+ let Inst{26-22} = op;
+ let Inst{31-27} = 0x18; //encoding
+ let EncodingType = 5; //SIInstrEncodingType::SMRD
+
+ let NeedWait = 1;
+ let usesCustomInserter = 1;
+}
+
+class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<8> SSRC0;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = op;
+ let Inst{22-16} = SDST;
+ let Inst{31-23} = 0x17d; //encoding;
+ let EncodingType = 6; //SIInstrEncodingType::SOP1
+}
+
+class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<8> SSRC0;
+ bits<8> SSRC1;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = SSRC1;
+ let Inst{22-16} = SDST;
+ let Inst{29-23} = op;
+ let Inst{31-30} = 0x2; // encoding
+ let EncodingType = 7; // SIInstrEncodingType::SOP2
+}
+
+class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<8> SSRC0;
+ bits<8> SSRC1;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = SSRC1;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17e;
+ let EncodingType = 8; // SIInstrEncodingType::SOPC
+
+ let DisableEncoding = "$dst";
+}
+
+class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins , asm, pattern> {
+
+ bits <7> SDST;
+ bits <16> SIMM16;
+
+ let Inst{15-0} = SIMM16;
+ let Inst{22-16} = SDST;
+ let Inst{27-23} = op;
+ let Inst{31-28} = 0xb; //encoding
+ let EncodingType = 9; // SIInstrEncodingType::SOPK
+}
+
+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
+ (outs),
+ ins,
+ asm,
+ pattern > {
+
+ bits <16> SIMM16;
+
+ let Inst{15-0} = SIMM16;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17f; // encoding
+ let EncodingType = 10; // SIInstrEncodingType::SOPP
+}
+
+
+class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<8> VSRC;
+ bits<2> ATTRCHAN;
+ bits<6> ATTR;
+
+ let Inst{7-0} = VSRC;
+ let Inst{9-8} = ATTRCHAN;
+ let Inst{15-10} = ATTR;
+ let Inst{17-16} = op;
+ let Inst{25-18} = VDST;
+ let Inst{31-26} = 0x32; // encoding
+ let EncodingType = 11; // SIInstrEncodingType::VINTRP
+
+ let Uses = [M0];
+}
+
+class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = op;
+ let Inst{24-17} = VDST;
+ let Inst{31-25} = 0x3f; //encoding
+
+ let EncodingType = 12; // SIInstrEncodingType::VOP1
+ let PostEncoderMethod = "VOPPostEncode";
+}
+
+class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<8> VSRC1;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = VSRC1;
+ let Inst{24-17} = VDST;
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; //encoding
+
+ let EncodingType = 13; // SIInstrEncodingType::VOP2
+ let PostEncoderMethod = "VOPPostEncode";
+}
+
+class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<9> SRC1;
+ bits<9> SRC2;
+ bits<3> ABS;
+ bits<1> CLAMP;
+ bits<2> OMOD;
+ bits<3> NEG;
+
+ let Inst{7-0} = VDST;
+ let Inst{10-8} = ABS;
+ let Inst{11} = CLAMP;
+ let Inst{25-17} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = SRC0;
+ let Inst{49-41} = SRC1;
+ let Inst{58-50} = SRC2;
+ let Inst{60-59} = OMOD;
+ let Inst{63-61} = NEG;
+
+ let EncodingType = 14; // SIInstrEncodingType::VOP3
+ let PostEncoderMethod = "VOPPostEncode";
+}
+
+class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
+ Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
+
+ bits<9> SRC0;
+ bits<8> VSRC1;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = VSRC1;
+ let Inst{24-17} = op;
+ let Inst{31-25} = 0x3e;
+
+ let EncodingType = 15; //SIInstrEncodingType::VOPC
+ let PostEncoderMethod = "VOPPostEncode";
+ let DisableEncoding = "$dst";
+}
+
+class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
+ op,
+ (outs VReg_128:$vdata),
+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr,
+ GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
+ asm,
+ []
+>;
+
+class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
+ op,
+ (outs regClass:$dst),
+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
+ i1imm:$tfe, SReg_32:$soffset),
+ asm,
+ []> {
+ let mayLoad = 1;
+}
+
+class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+ op,
+ (outs regClass:$dst),
+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
+ i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
+ asm,
+ []> {
+ let mayLoad = 1;
+}
+
+class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+ op,
+ (outs),
+ (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
+ i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
+ GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
+ asm,
+ []> {
+ let mayStore = 1;
+}
+
+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
+ ValueType vt> {
+ def _IMM : SMRD <
+ op,
+ (outs dstClass:$dst),
+ (ins SMRDmemri:$src0),
+ asm,
+ [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
+ >;
+
+ def _SGPR : SMRD <
+ op,
+ (outs dstClass:$dst),
+ (ins SMRDmemrr:$src0),
+ asm,
+ [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
+ >;
+}
+
+multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> {
+ defm _F32 : SMRD_Helper <op, asm, dstClass, f32>;
+ defm _I32 : SMRD_Helper <op, asm, dstClass, i32>;
+}
+
+include "SIInstrFormats.td"
+include "SIInstructions.td"
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
new file mode 100644
index 0000000..f09d604
--- /dev/null
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -0,0 +1,1077 @@
+//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def isSI : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
+
+let Predicates = [isSI] in {
+
+def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
+def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
+def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
+def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
+def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
+def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
+def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
+def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
+def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
+////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
+////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
+////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
+////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
+////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
+////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
+////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
+////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
+//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
+//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
+def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
+//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
+//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
+//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
+////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
+////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
+////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
+////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
+def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
+def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
+def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
+def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
+def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
+def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
+def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
+////def S_ANDN2_SAVEEXEC_B64 : SOP1_ANDN2 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
+////def S_ORN2_SAVEEXEC_B64 : SOP1_ORN2 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
+def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
+def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
+def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
+def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
+def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
+def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
+def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
+def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
+def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
+//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
+def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
+def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
+def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
+def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
+def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
+
+/*
+This instruction is disabled for now until we can figure out how to teach
+the instruction selector to correctly use the S_CMP* vs V_CMP*
+instructions.
+
+When this instruction is enabled the code generator sometimes produces this
+invalid sequence:
+
+SCC = S_CMPK_EQ_I32 SGPR0, imm
+VCC = COPY SCC
+VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
+
+def S_CMPK_EQ_I32 : SOPK <
+ 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
+ "S_CMPK_EQ_I32",
+ [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
+>;
+*/
+
+def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
+def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
+def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
+def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
+def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
+def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
+def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
+def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
+def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
+def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
+def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
+def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
+def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
+def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
+def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
+def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
+//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
+//def EXP : EXP_ <0x00000000, "EXP", []>;
+
+defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>;
+defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32",
+ [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT))]
+>;
+defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32",
+ [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ))]
+>;
+defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32",
+ [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE))]
+>;
+defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32",
+ [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT))]
+>;
+defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32",
+ [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE))]
+>;
+defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32",
+ [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE))]
+>;
+defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>;
+defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>;
+defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>;
+defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>;
+defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>;
+defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>;
+defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32",
+ [(set VCCReg:$dst, (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE))]
+>;
+defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>;
+defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>;
+defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>;
+defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>;
+defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>;
+defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>;
+defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>;
+defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>;
+defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>;
+defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>;
+defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>;
+defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>;
+defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>;
+defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>;
+defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>;
+defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>;
+defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>;
+defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>;
+defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>;
+defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>;
+defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>;
+defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>;
+defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>;
+defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>;
+defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>;
+defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>;
+defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>;
+defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>;
+defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>;
+defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>;
+defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>;
+defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>;
+defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>;
+defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>;
+defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>;
+defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>;
+defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>;
+defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>;
+defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>;
+defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>;
+defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>;
+defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>;
+defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>;
+defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>;
+defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>;
+defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>;
+defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>;
+defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>;
+defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>;
+defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>;
+defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>;
+defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>;
+defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>;
+defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>;
+defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>;
+defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>;
+defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>;
+defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>;
+defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>;
+defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>;
+defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>;
+defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>;
+defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>;
+defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>;
+defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>;
+defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>;
+defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>;
+defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>;
+defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>;
+defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>;
+defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>;
+defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>;
+defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>;
+defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>;
+defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>;
+defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>;
+defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>;
+defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>;
+defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>;
+defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>;
+defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>;
+defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>;
+defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>;
+defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>;
+defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>;
+defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>;
+defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>;
+defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>;
+defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>;
+defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>;
+defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>;
+defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>;
+defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>;
+defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>;
+defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>;
+defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>;
+defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>;
+defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>;
+defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>;
+defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>;
+defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>;
+defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>;
+defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>;
+defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>;
+defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>;
+defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>;
+defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>;
+defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>;
+defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>;
+defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>;
+defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>;
+defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>;
+defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>;
+defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>;
+defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>;
+defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", []>;
+defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32",
+ [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETEQ))]
+>;
+defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", []>;
+defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", []>;
+defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32",
+ [(set VCCReg:$dst, (setcc (i32 AllReg_32:$src0), VReg_32:$src1, SETNE))]
+>;
+defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", []>;
+defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>;
+defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>;
+defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>;
+defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>;
+defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>;
+defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>;
+defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>;
+defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>;
+defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>;
+defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>;
+defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>;
+defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>;
+defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>;
+defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>;
+defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>;
+defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>;
+defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>;
+defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>;
+defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>;
+defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>;
+defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>;
+defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>;
+defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>;
+defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>;
+defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>;
+defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>;
+defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>;
+defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>;
+defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>;
+defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>;
+defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>;
+defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>;
+defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>;
+defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>;
+defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>;
+defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>;
+defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>;
+defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>;
+defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>;
+defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>;
+defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>;
+defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>;
+defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>;
+defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>;
+defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>;
+defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>;
+defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>;
+defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>;
+defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>;
+defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>;
+defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>;
+defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>;
+defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>;
+defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>;
+defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>;
+defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>;
+defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>;
+defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>;
+defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>;
+defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>;
+defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>;
+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
+def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
+//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
+//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>;
+//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
+//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
+//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
+//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>;
+//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>;
+//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>;
+//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
+//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
+//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
+//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
+//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
+//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
+//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
+//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
+//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
+//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
+//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
+//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
+//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
+//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
+//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
+//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
+//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
+//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
+//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
+//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
+def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
+//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
+//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
+//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
+//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
+
+defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>;
+
+//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>;
+//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
+//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
+//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
+//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>;
+//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
+//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
+
+//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
+//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
+//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
+//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
+//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
+//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
+def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">;
+//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
+//def IMAGE_SAMPLE_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_D", 0x00000022>;
+//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
+//def IMAGE_SAMPLE_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_L", 0x00000024>;
+//def IMAGE_SAMPLE_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_B", 0x00000025>;
+//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
+//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
+//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>;
+//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
+//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
+//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
+//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>;
+//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>;
+//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
+//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
+//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
+//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>;
+//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>;
+//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>;
+//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>;
+//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>;
+//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>;
+//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>;
+//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>;
+//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>;
+//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>;
+//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>;
+//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>;
+//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
+//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
+//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
+//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
+//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
+//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
+//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
+//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
+//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
+//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
+//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
+//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
+//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
+//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
+//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
+//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
+//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
+//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
+//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
+//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
+//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
+//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
+//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
+//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
+//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
+//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
+//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
+//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
+//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
+//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
+//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
+//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>;
+//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>;
+//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>;
+//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>;
+//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
+//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
+
+let neverHasSideEffects = 1 in {
+defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
+} // End neverHasSideEffects
+defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
+//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
+//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
+defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
+ [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))]
+>;
+//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
+//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+//defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", []>;
+defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
+////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
+//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
+//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
+//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
+//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
+//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>;
+//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>;
+//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
+//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
+//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
+//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
+//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
+//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
+defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", []>;
+defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
+defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>;
+defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", []>;
+defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32", []>;
+defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", []>;
+defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
+defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>;
+defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
+defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
+defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", []>;
+defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
+defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
+defm V_RSQ_LEGACY_F32 : VOP1_32 <
+ 0x0000002d, "V_RSQ_LEGACY_F32",
+ [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))]
+>;
+defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
+defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
+defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
+defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
+defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
+defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>;
+defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>;
+defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
+defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
+defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
+defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
+defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
+defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
+defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
+//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
+defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
+defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
+//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
+defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
+//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
+defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
+defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
+defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
+
+def V_INTERP_P1_F32 : VINTRP <
+ 0x00000000,
+ (outs VReg_32:$dst),
+ (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr),
+ "V_INTERP_P1_F32",
+ []
+>;
+
+def V_INTERP_P2_F32 : VINTRP <
+ 0x00000001,
+ (outs VReg_32:$dst),
+ (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr),
+ "V_INTERP_P2_F32",
+ []> {
+
+ let Constraints = "$src0 = $dst";
+ let DisableEncoding = "$src0";
+
+}
+
+def V_INTERP_MOV_F32 : VINTRP <
+ 0x00000002,
+ (outs VReg_32:$dst),
+ (ins i32imm:$attr_chan, i32imm:$attr),
+ "V_INTERP_MOV_F32",
+ []> {
+ let VSRC = 0;
+}
+
+//def V_INTERP_MOV_F32 : VINTRP_32 <0x00000002, "V_INTERP_MOV_F32", []>;
+//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
+
+let isTerminator = 1 in {
+
+def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
+ [(IL_retflag)]> {
+ let SIMM16 = 0;
+ let isBarrier = 1;
+ let hasCtrlDep = 1;
+}
+
+let isBranch = 1 in {
+def S_BRANCH : SOPP <
+ 0x00000002, (ins brtarget:$target), "S_BRANCH",
+ []
+>;
+
+let DisableEncoding = "$scc" in {
+def S_CBRANCH_SCC0 : SOPP <
+ 0x00000004, (ins brtarget:$target, SCCReg:$scc),
+ "S_CBRANCH_SCC0", []
+>;
+def S_CBRANCH_SCC1 : SOPP <
+ 0x00000005, (ins brtarget:$target, SCCReg:$scc),
+ "S_CBRANCH_SCC1",
+ []
+>;
+} // End DisableEncoding = "$scc"
+
+def S_CBRANCH_VCCZ : SOPP <
+ 0x00000006, (ins brtarget:$target, VCCReg:$vcc),
+ "S_CBRANCH_VCCZ",
+ []
+>;
+def S_CBRANCH_VCCNZ : SOPP <
+ 0x00000007, (ins brtarget:$target, VCCReg:$vcc),
+ "S_CBRANCH_VCCNZ",
+ []
+>;
+//def S_CBRANCH_EXECZ : SOPP_ <0x00000008, "S_CBRANCH_EXECZ", []>;
+//def S_CBRANCH_EXECNZ : SOPP_ <0x00000009, "S_CBRANCH_EXECNZ", []>;
+
+
+} // End isBranch = 1
+} // End isTerminator = 1
+
+//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>;
+def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
+ []
+>;
+//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
+//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
+//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
+//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
+//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
+//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
+//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
+//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
+//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
+//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+
+/* XXX: No VOP3 version of this instruction yet */
+def V_CNDMASK_B32 : VOP2 <0x00000000, (outs VReg_32:$dst),
+ (ins VCCReg:$vcc, AllReg_32:$src0, VReg_32:$src1), "V_CNDMASK_B32",
+ [(set (i32 VReg_32:$dst),
+ (select VCCReg:$vcc, AllReg_32:$src0, VReg_32:$src1))] > {
+
+ let DisableEncoding = "$vcc";
+}
+
+//f32 pattern for V_CNDMASK_B32
+def : Pat <
+ (f32 (select VCCReg:$vcc, AllReg_32:$src0, VReg_32:$src1)),
+ (V_CNDMASK_B32 VCCReg:$vcc, AllReg_32:$src0, VReg_32:$src1)
+>;
+
+defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
+defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
+
+defm V_ADD_F32 : VOP2_32 <
+ 0x00000003, "V_ADD_F32",
+ [(set VReg_32:$dst, (fadd AllReg_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
+ [(set VReg_32:$dst, (fsub AllReg_32:$src0, VReg_32:$src1))]
+>;
+defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>;
+defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
+defm V_MUL_LEGACY_F32 : VOP2_32 <
+ 0x00000007, "V_MUL_LEGACY_F32",
+ [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))]
+>;
+defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", []>;
+//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>;
+//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
+//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
+//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
+defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", []>;
+
+defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
+ [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))]
+>;
+defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
+defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
+defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
+defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
+defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
+defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>;
+defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>;
+defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>;
+defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
+defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
+defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
+defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
+//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
+//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
+//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
+defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32", []>;
+defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32", []>;
+defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>;
+defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>;
+defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>;
+defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>;
+defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
+////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
+////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
+////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
+////def V_CVT_PKRTZ_F16_F32 : VOP2_F16 <0x0000002f, "V_CVT_PKRTZ_F16_F32", []>;
+////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
+////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>;
+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>;
+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>;
+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>;
+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>;
+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>;
+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>;
+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>;
+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>;
+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>;
+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>;
+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
+//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+
+let neverHasSideEffects = 1 in {
+
+def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
+def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>;
+//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>;
+//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>;
+
+} // End neverHasSideEffects
+def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
+def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
+def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
+def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
+def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
+def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
+def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
+def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
+def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
+def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
+def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
+def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
+////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
+////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
+////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
+////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
+////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
+////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
+////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
+////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
+////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
+def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
+def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>;
+def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>;
+def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>;
+def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
+def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
+def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
+def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
+def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
+def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
+def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
+def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
+def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
+def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
+def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
+def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>;
+def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>;
+def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>;
+def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>;
+def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
+def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
+def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
+def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
+
+def S_CSELECT_B32 : SOP2 <
+ 0x0000000a, (outs SReg_32:$dst),
+ (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
+ [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))]
+>;
+
+def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
+
+// f32 pattern for S_CSELECT_B32
+def : Pat <
+ (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)),
+ (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
+>;
+
+def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
+
+def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
+ [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))]
+>;
+def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64",
+ [(set VCCReg:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))]
+>;
+def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
+def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
+def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
+def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
+////def S_ANDN2_B32 : SOP2_ANDN2 <0x00000014, "S_ANDN2_B32", []>;
+////def S_ANDN2_B64 : SOP2_ANDN2 <0x00000015, "S_ANDN2_B64", []>;
+////def S_ORN2_B32 : SOP2_ORN2 <0x00000016, "S_ORN2_B32", []>;
+////def S_ORN2_B64 : SOP2_ORN2 <0x00000017, "S_ORN2_B64", []>;
+def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
+def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
+def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
+def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
+def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
+def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
+def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
+def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
+def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
+def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
+def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
+def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
+def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
+def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
+def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
+def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
+//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
+def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
+
+class V_MOV_IMM <Operand immType, SDNode immNode> : VOP1 <
+ 0x1,
+ (outs VReg_32:$dst),
+ (ins immType:$src0),
+ "V_MOV_IMM",
+ [(set VReg_32:$dst, (immNode:$src0))]
+>;
+
+def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>;
+def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>;
+
+def S_MOV_IMM_I32 : SOP1 <
+ 0x3,
+ (outs SReg_32:$dst),
+ (ins i32Literal:$src0),
+ "S_MOV_IMM_I32",
+ [(set SReg_32:$dst, (imm:$src0))]
+>;
+
+// i64 immediates aren't really supported in hardware, but LLVM will use the i64
+// type for indices on load and store instructions. The pattern for
+// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits,
+// which the hardware can handle.
+def S_MOV_IMM_I64 : SOP1 <
+ 0x3,
+ (outs SReg_64:$dst),
+ (ins i64Literal:$src0),
+ "S_MOV_IMM_I64 $dst, $src0",
+ [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))]
+>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+def SET_M0 : InstSI <
+ (outs SReg_32:$dst),
+ (ins i32imm:$src0),
+ "SET_M0",
+ [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))]
+>;
+
+def CONFIG_WRITE : InstSI <
+ (outs i32imm:$reg),
+ (ins i32imm:$val),
+ "CONFIG_WRITE $reg, $val",
+ [] > {
+ field bits<32> Inst = 0;
+}
+
+def LOAD_CONST : AMDGPUShaderInst <
+ (outs GPRF32:$dst),
+ (ins i32imm:$src),
+ "LOAD_CONST $dst, $src",
+ [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
+>;
+
+let usesCustomInserter = 1 in {
+
+def SI_V_CNDLT : InstSI <
+ (outs VReg_32:$dst),
+ (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
+ "SI_V_CNDLT $dst, $src0, $src1, $src2",
+ [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))]
+>;
+
+def SI_INTERP : InstSI <
+ (outs VReg_32:$dst),
+ (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
+ "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params",
+ []
+>;
+
+def SI_INTERP_CONST : InstSI <
+ (outs VReg_32:$dst),
+ (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
+ "SI_INTERP_CONST $dst, $attr_chan, $attr, $params",
+ [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan,
+ imm:$attr, SReg_32:$params))]
+>;
+
+} // end usesCustomInserter
+
+// SI Psuedo branch instructions. These are used by the CFG structurizer pass
+// and should be lowered to ISA instructions prior to codegen.
+
+let isBranch = 1, isTerminator = 1 in {
+def SI_IF_NZ : InstSI <
+ (outs),
+ (ins brtarget:$target, VCCReg:$vcc),
+ "SI_BRANCH_NZ",
+ [(IL_brcond bb:$target, VCCReg:$vcc)]
+>;
+
+def SI_IF_Z : InstSI <
+ (outs),
+ (ins brtarget:$target, VCCReg:$vcc),
+ "SI_BRANCH_Z",
+ []
+>;
+} // end isBranch = 1, isTerminator = 1
+} // end IsCodeGenOnly, isPseudo
+
+/* int_SI_vs_load_input */
+def : Pat<
+ (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
+ VReg_32:$buf_idx_vgpr),
+ (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
+ VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
+ 0, 0, (i32 SREG_LIT_0))
+>;
+
+/* int_SI_export */
+def : Pat <
+ (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
+ VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
+>;
+
+/* int_SI_sample */
+def : Pat <
+ (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler),
+ (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+def CLAMP_SI : CLAMP<VReg_32>;
+def FABS_SI : FABS<VReg_32>;
+def FNEG_SI : FNEG<VReg_32>;
+
+def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
+def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;
+def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>;
+def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>;
+
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>;
+
+def : Vector_Build <v4f32, VReg_32>;
+def : Vector_Build <v4i32, SReg_32>;
+
+def : BitConvert <i32, f32, SReg_32>;
+def : BitConvert <i32, f32, VReg_32>;
+
+def : BitConvert <f32, i32, SReg_32>;
+def : BitConvert <f32, i32, VReg_32>;
+
+def : Pat <
+ (i64 (SIvcc_bitcast VCCReg:$vcc)),
+ (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64))
+>;
+
+def : Pat <
+ (i1 (SIvcc_bitcast SReg_64:$vcc)),
+ (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg)
+>;
+
+/********** ===================== **********/
+/********** Interpolation Paterns **********/
+/********** ===================== **********/
+
+def : Pat <
+ (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params),
+ (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan,
+ imm:$attr, SReg_32:$params)
+>;
+
+def : Pat <
+ (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
+ (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan,
+ imm:$attr, SReg_32:$params)
+>;
+
+def : Pat <
+ (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params),
+ (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan,
+ imm:$attr, SReg_32:$params)
+>;
+
+def : Pat <
+ (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
+ (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan,
+ imm:$attr, SReg_32:$params)
+>;
+
+/********** ================== **********/
+/********** Intrinsic Patterns **********/
+/********** ================== **********/
+
+/* llvm.AMDGPU.pow */
+/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>;
+
+def : Pat <
+ (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1),
+ (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1))
+>;
+
+/********** ================== **********/
+/********** VOP3 Patterns **********/
+/********** ================== **********/
+
+def : Pat <(f32 (IL_mad AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2)),
+ (V_MAD_LEGACY_F32 AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2,
+ 0, 0, 0, 0)>;
+
+} // End isSI predicate
diff --git a/lib/Target/AMDGPU/SIIntrinsics.td b/lib/Target/AMDGPU/SIIntrinsics.td
new file mode 100644
index 0000000..6eadc94
--- /dev/null
+++ b/lib/Target/AMDGPU/SIIntrinsics.td
@@ -0,0 +1,35 @@
+//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+
+let TargetPrefix = "SI", isTarget = 1 in {
+
+ def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+ /* XXX: We may need a seperate intrinsic here for loading integer values */
+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
+ def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
+ def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
+
+ def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
+
+ /* Interpolation Intrinsics */
+
+ def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
+ class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+
+ def int_SI_fs_interp_linear_center : Interp;
+ def int_SI_fs_interp_linear_centroid : Interp;
+ def int_SI_fs_interp_persp_center : Interp;
+ def int_SI_fs_interp_persp_centroid : Interp;
+ def int_SI_fs_interp_constant : Interp;
+}
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
new file mode 100644
index 0000000..40ba76f
--- /dev/null
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -0,0 +1,18 @@
+//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIMachineFunctionInfo.h"
+
+using namespace llvm;
+
+SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
+ : MachineFunctionInfo(),
+ spi_ps_input_addr(0)
+ { }
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
new file mode 100644
index 0000000..46a021f
--- /dev/null
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -0,0 +1,37 @@
+//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config
+// register, which is to tell the hardware which interpolation parameters to
+// load.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef _SIMACHINEFUNCTIONINFO_H_
+#define _SIMACHINEFUNCTIONINFO_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class SIMachineFunctionInfo : public MachineFunctionInfo {
+
+ private:
+
+ public:
+ SIMachineFunctionInfo(const MachineFunction &MF);
+ unsigned spi_ps_input_addr;
+
+};
+
+} // End namespace llvm
+
+
+#endif //_SIMACHINEFUNCTIONINFO_H_
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
new file mode 100644
index 0000000..3d6dc83
--- /dev/null
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -0,0 +1,50 @@
+//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SI implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIRegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const
+{
+ BitVector Reserved(getNumRegs());
+ return Reserved;
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
+{
+ switch (rc->getID()) {
+ case AMDGPU::GPRF32RegClassID:
+ return &AMDGPU::VReg_32RegClass;
+ default: return rc;
+ }
+}
+
+const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
+ MVT VT) const
+{
+ switch(VT.SimpleTy) {
+ default:
+ case MVT::i32: return &AMDGPU::VReg_32RegClass;
+ }
+}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
new file mode 100644
index 0000000..b571da9
--- /dev/null
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -0,0 +1,47 @@
+//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface definition for SIRegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIREGISTERINFO_H_
+#define SIREGISTERINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct SIRegisterInfo : public AMDGPURegisterInfo
+{
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// getISARegClass - rc is an AMDIL reg class. This function returns the
+ /// SI register class that is equivalent to the given AMDIL register class.
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass * rc) const;
+
+ /// getCFGStructurizerRegClass - get the register class of the specified
+ /// type to use in the CFGStructurizer
+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+
+};
+
+} // End namespace llvm
+
+#endif // SIREGISTERINFO_H_
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
new file mode 100644
index 0000000..c02d71d
--- /dev/null
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -0,0 +1,887 @@
+
+let Namespace = "AMDGPU" in {
+ def low : SubRegIndex;
+ def high : SubRegIndex;
+
+ def sub0 : SubRegIndex;
+ def sub1 : SubRegIndex;
+ def sub2 : SubRegIndex;
+ def sub3 : SubRegIndex;
+ def sub4 : SubRegIndex;
+ def sub5 : SubRegIndex;
+ def sub6 : SubRegIndex;
+ def sub7 : SubRegIndex;
+}
+
+class SIReg <string n, bits<16> encoding = 0> : Register<n> {
+ let Namespace = "AMDGPU";
+ let HWEncoding = encoding;
+}
+
+class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [low, high];
+ let HWEncoding = encoding;
+}
+
+class SI_128 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
+ let HWEncoding = encoding;
+}
+
+class SI_256 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
+ let HWEncoding = encoding;
+}
+
+class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
+
+class VGPR_32 <bits<16> num, string name> : SIReg<name, num>;
+
+class SGPR_64 <bits<16> num, string name, list<Register> subregs> :
+ SI_64 <name, subregs, num>;
+
+class VGPR_64 <bits<16> num, string name, list<Register> subregs> :
+ SI_64 <name, subregs, num>;
+
+class SGPR_128 <bits<16> num, string name, list<Register> subregs> :
+ SI_128 <name, subregs, num>;
+
+class VGPR_128 <bits<16> num, string name, list<Register> subregs> :
+ SI_128 <name, subregs, num>;
+
+class SGPR_256 <bits<16> num, string name, list<Register> subregs> :
+ SI_256 <name, subregs, num>;
+
+def VCC : SIReg<"VCC">;
+def SCC : SIReg<"SCC">;
+def SREG_LIT_0 : SIReg <"S LIT 0", 128>;
+
+def M0 : SIReg <"M0", 124>;
+
+//Interpolation registers
+
+def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
+def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
+def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
+def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
+def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
+def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
+def PERSP_I_W : SIReg <"PERSP_I_W">;
+def PERSP_J_W : SIReg <"PERSP_J_W">;
+def PERSP_1_W : SIReg <"PERSP_1_W">;
+def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
+def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
+def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
+def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
+def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
+def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
+def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
+def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
+def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
+def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
+def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
+def FRONT_FACE : SIReg <"FRONT_FACE">;
+def ANCILLARY : SIReg <"ANCILLARY">;
+def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
+def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
+
+def SGPR0 : SGPR_32 <0, "SGPR0">;
+def SGPR1 : SGPR_32 <1, "SGPR1">;
+def SGPR2 : SGPR_32 <2, "SGPR2">;
+def SGPR3 : SGPR_32 <3, "SGPR3">;
+def SGPR4 : SGPR_32 <4, "SGPR4">;
+def SGPR5 : SGPR_32 <5, "SGPR5">;
+def SGPR6 : SGPR_32 <6, "SGPR6">;
+def SGPR7 : SGPR_32 <7, "SGPR7">;
+def SGPR8 : SGPR_32 <8, "SGPR8">;
+def SGPR9 : SGPR_32 <9, "SGPR9">;
+def SGPR10 : SGPR_32 <10, "SGPR10">;
+def SGPR11 : SGPR_32 <11, "SGPR11">;
+def SGPR12 : SGPR_32 <12, "SGPR12">;
+def SGPR13 : SGPR_32 <13, "SGPR13">;
+def SGPR14 : SGPR_32 <14, "SGPR14">;
+def SGPR15 : SGPR_32 <15, "SGPR15">;
+def SGPR16 : SGPR_32 <16, "SGPR16">;
+def SGPR17 : SGPR_32 <17, "SGPR17">;
+def SGPR18 : SGPR_32 <18, "SGPR18">;
+def SGPR19 : SGPR_32 <19, "SGPR19">;
+def SGPR20 : SGPR_32 <20, "SGPR20">;
+def SGPR21 : SGPR_32 <21, "SGPR21">;
+def SGPR22 : SGPR_32 <22, "SGPR22">;
+def SGPR23 : SGPR_32 <23, "SGPR23">;
+def SGPR24 : SGPR_32 <24, "SGPR24">;
+def SGPR25 : SGPR_32 <25, "SGPR25">;
+def SGPR26 : SGPR_32 <26, "SGPR26">;
+def SGPR27 : SGPR_32 <27, "SGPR27">;
+def SGPR28 : SGPR_32 <28, "SGPR28">;
+def SGPR29 : SGPR_32 <29, "SGPR29">;
+def SGPR30 : SGPR_32 <30, "SGPR30">;
+def SGPR31 : SGPR_32 <31, "SGPR31">;
+def SGPR32 : SGPR_32 <32, "SGPR32">;
+def SGPR33 : SGPR_32 <33, "SGPR33">;
+def SGPR34 : SGPR_32 <34, "SGPR34">;
+def SGPR35 : SGPR_32 <35, "SGPR35">;
+def SGPR36 : SGPR_32 <36, "SGPR36">;
+def SGPR37 : SGPR_32 <37, "SGPR37">;
+def SGPR38 : SGPR_32 <38, "SGPR38">;
+def SGPR39 : SGPR_32 <39, "SGPR39">;
+def SGPR40 : SGPR_32 <40, "SGPR40">;
+def SGPR41 : SGPR_32 <41, "SGPR41">;
+def SGPR42 : SGPR_32 <42, "SGPR42">;
+def SGPR43 : SGPR_32 <43, "SGPR43">;
+def SGPR44 : SGPR_32 <44, "SGPR44">;
+def SGPR45 : SGPR_32 <45, "SGPR45">;
+def SGPR46 : SGPR_32 <46, "SGPR46">;
+def SGPR47 : SGPR_32 <47, "SGPR47">;
+def SGPR48 : SGPR_32 <48, "SGPR48">;
+def SGPR49 : SGPR_32 <49, "SGPR49">;
+def SGPR50 : SGPR_32 <50, "SGPR50">;
+def SGPR51 : SGPR_32 <51, "SGPR51">;
+def SGPR52 : SGPR_32 <52, "SGPR52">;
+def SGPR53 : SGPR_32 <53, "SGPR53">;
+def SGPR54 : SGPR_32 <54, "SGPR54">;
+def SGPR55 : SGPR_32 <55, "SGPR55">;
+def SGPR56 : SGPR_32 <56, "SGPR56">;
+def SGPR57 : SGPR_32 <57, "SGPR57">;
+def SGPR58 : SGPR_32 <58, "SGPR58">;
+def SGPR59 : SGPR_32 <59, "SGPR59">;
+def SGPR60 : SGPR_32 <60, "SGPR60">;
+def SGPR61 : SGPR_32 <61, "SGPR61">;
+def SGPR62 : SGPR_32 <62, "SGPR62">;
+def SGPR63 : SGPR_32 <63, "SGPR63">;
+def SGPR64 : SGPR_32 <64, "SGPR64">;
+def SGPR65 : SGPR_32 <65, "SGPR65">;
+def SGPR66 : SGPR_32 <66, "SGPR66">;
+def SGPR67 : SGPR_32 <67, "SGPR67">;
+def SGPR68 : SGPR_32 <68, "SGPR68">;
+def SGPR69 : SGPR_32 <69, "SGPR69">;
+def SGPR70 : SGPR_32 <70, "SGPR70">;
+def SGPR71 : SGPR_32 <71, "SGPR71">;
+def SGPR72 : SGPR_32 <72, "SGPR72">;
+def SGPR73 : SGPR_32 <73, "SGPR73">;
+def SGPR74 : SGPR_32 <74, "SGPR74">;
+def SGPR75 : SGPR_32 <75, "SGPR75">;
+def SGPR76 : SGPR_32 <76, "SGPR76">;
+def SGPR77 : SGPR_32 <77, "SGPR77">;
+def SGPR78 : SGPR_32 <78, "SGPR78">;
+def SGPR79 : SGPR_32 <79, "SGPR79">;
+def SGPR80 : SGPR_32 <80, "SGPR80">;
+def SGPR81 : SGPR_32 <81, "SGPR81">;
+def SGPR82 : SGPR_32 <82, "SGPR82">;
+def SGPR83 : SGPR_32 <83, "SGPR83">;
+def SGPR84 : SGPR_32 <84, "SGPR84">;
+def SGPR85 : SGPR_32 <85, "SGPR85">;
+def SGPR86 : SGPR_32 <86, "SGPR86">;
+def SGPR87 : SGPR_32 <87, "SGPR87">;
+def SGPR88 : SGPR_32 <88, "SGPR88">;
+def SGPR89 : SGPR_32 <89, "SGPR89">;
+def SGPR90 : SGPR_32 <90, "SGPR90">;
+def SGPR91 : SGPR_32 <91, "SGPR91">;
+def SGPR92 : SGPR_32 <92, "SGPR92">;
+def SGPR93 : SGPR_32 <93, "SGPR93">;
+def SGPR94 : SGPR_32 <94, "SGPR94">;
+def SGPR95 : SGPR_32 <95, "SGPR95">;
+def SGPR96 : SGPR_32 <96, "SGPR96">;
+def SGPR97 : SGPR_32 <97, "SGPR97">;
+def SGPR98 : SGPR_32 <98, "SGPR98">;
+def SGPR99 : SGPR_32 <99, "SGPR99">;
+def SGPR100 : SGPR_32 <100, "SGPR100">;
+def SGPR101 : SGPR_32 <101, "SGPR101">;
+def SGPR102 : SGPR_32 <102, "SGPR102">;
+def SGPR103 : SGPR_32 <103, "SGPR103">;
+def VGPR0 : VGPR_32 <0, "VGPR0">;
+def VGPR1 : VGPR_32 <1, "VGPR1">;
+def VGPR2 : VGPR_32 <2, "VGPR2">;
+def VGPR3 : VGPR_32 <3, "VGPR3">;
+def VGPR4 : VGPR_32 <4, "VGPR4">;
+def VGPR5 : VGPR_32 <5, "VGPR5">;
+def VGPR6 : VGPR_32 <6, "VGPR6">;
+def VGPR7 : VGPR_32 <7, "VGPR7">;
+def VGPR8 : VGPR_32 <8, "VGPR8">;
+def VGPR9 : VGPR_32 <9, "VGPR9">;
+def VGPR10 : VGPR_32 <10, "VGPR10">;
+def VGPR11 : VGPR_32 <11, "VGPR11">;
+def VGPR12 : VGPR_32 <12, "VGPR12">;
+def VGPR13 : VGPR_32 <13, "VGPR13">;
+def VGPR14 : VGPR_32 <14, "VGPR14">;
+def VGPR15 : VGPR_32 <15, "VGPR15">;
+def VGPR16 : VGPR_32 <16, "VGPR16">;
+def VGPR17 : VGPR_32 <17, "VGPR17">;
+def VGPR18 : VGPR_32 <18, "VGPR18">;
+def VGPR19 : VGPR_32 <19, "VGPR19">;
+def VGPR20 : VGPR_32 <20, "VGPR20">;
+def VGPR21 : VGPR_32 <21, "VGPR21">;
+def VGPR22 : VGPR_32 <22, "VGPR22">;
+def VGPR23 : VGPR_32 <23, "VGPR23">;
+def VGPR24 : VGPR_32 <24, "VGPR24">;
+def VGPR25 : VGPR_32 <25, "VGPR25">;
+def VGPR26 : VGPR_32 <26, "VGPR26">;
+def VGPR27 : VGPR_32 <27, "VGPR27">;
+def VGPR28 : VGPR_32 <28, "VGPR28">;
+def VGPR29 : VGPR_32 <29, "VGPR29">;
+def VGPR30 : VGPR_32 <30, "VGPR30">;
+def VGPR31 : VGPR_32 <31, "VGPR31">;
+def VGPR32 : VGPR_32 <32, "VGPR32">;
+def VGPR33 : VGPR_32 <33, "VGPR33">;
+def VGPR34 : VGPR_32 <34, "VGPR34">;
+def VGPR35 : VGPR_32 <35, "VGPR35">;
+def VGPR36 : VGPR_32 <36, "VGPR36">;
+def VGPR37 : VGPR_32 <37, "VGPR37">;
+def VGPR38 : VGPR_32 <38, "VGPR38">;
+def VGPR39 : VGPR_32 <39, "VGPR39">;
+def VGPR40 : VGPR_32 <40, "VGPR40">;
+def VGPR41 : VGPR_32 <41, "VGPR41">;
+def VGPR42 : VGPR_32 <42, "VGPR42">;
+def VGPR43 : VGPR_32 <43, "VGPR43">;
+def VGPR44 : VGPR_32 <44, "VGPR44">;
+def VGPR45 : VGPR_32 <45, "VGPR45">;
+def VGPR46 : VGPR_32 <46, "VGPR46">;
+def VGPR47 : VGPR_32 <47, "VGPR47">;
+def VGPR48 : VGPR_32 <48, "VGPR48">;
+def VGPR49 : VGPR_32 <49, "VGPR49">;
+def VGPR50 : VGPR_32 <50, "VGPR50">;
+def VGPR51 : VGPR_32 <51, "VGPR51">;
+def VGPR52 : VGPR_32 <52, "VGPR52">;
+def VGPR53 : VGPR_32 <53, "VGPR53">;
+def VGPR54 : VGPR_32 <54, "VGPR54">;
+def VGPR55 : VGPR_32 <55, "VGPR55">;
+def VGPR56 : VGPR_32 <56, "VGPR56">;
+def VGPR57 : VGPR_32 <57, "VGPR57">;
+def VGPR58 : VGPR_32 <58, "VGPR58">;
+def VGPR59 : VGPR_32 <59, "VGPR59">;
+def VGPR60 : VGPR_32 <60, "VGPR60">;
+def VGPR61 : VGPR_32 <61, "VGPR61">;
+def VGPR62 : VGPR_32 <62, "VGPR62">;
+def VGPR63 : VGPR_32 <63, "VGPR63">;
+def VGPR64 : VGPR_32 <64, "VGPR64">;
+def VGPR65 : VGPR_32 <65, "VGPR65">;
+def VGPR66 : VGPR_32 <66, "VGPR66">;
+def VGPR67 : VGPR_32 <67, "VGPR67">;
+def VGPR68 : VGPR_32 <68, "VGPR68">;
+def VGPR69 : VGPR_32 <69, "VGPR69">;
+def VGPR70 : VGPR_32 <70, "VGPR70">;
+def VGPR71 : VGPR_32 <71, "VGPR71">;
+def VGPR72 : VGPR_32 <72, "VGPR72">;
+def VGPR73 : VGPR_32 <73, "VGPR73">;
+def VGPR74 : VGPR_32 <74, "VGPR74">;
+def VGPR75 : VGPR_32 <75, "VGPR75">;
+def VGPR76 : VGPR_32 <76, "VGPR76">;
+def VGPR77 : VGPR_32 <77, "VGPR77">;
+def VGPR78 : VGPR_32 <78, "VGPR78">;
+def VGPR79 : VGPR_32 <79, "VGPR79">;
+def VGPR80 : VGPR_32 <80, "VGPR80">;
+def VGPR81 : VGPR_32 <81, "VGPR81">;
+def VGPR82 : VGPR_32 <82, "VGPR82">;
+def VGPR83 : VGPR_32 <83, "VGPR83">;
+def VGPR84 : VGPR_32 <84, "VGPR84">;
+def VGPR85 : VGPR_32 <85, "VGPR85">;
+def VGPR86 : VGPR_32 <86, "VGPR86">;
+def VGPR87 : VGPR_32 <87, "VGPR87">;
+def VGPR88 : VGPR_32 <88, "VGPR88">;
+def VGPR89 : VGPR_32 <89, "VGPR89">;
+def VGPR90 : VGPR_32 <90, "VGPR90">;
+def VGPR91 : VGPR_32 <91, "VGPR91">;
+def VGPR92 : VGPR_32 <92, "VGPR92">;
+def VGPR93 : VGPR_32 <93, "VGPR93">;
+def VGPR94 : VGPR_32 <94, "VGPR94">;
+def VGPR95 : VGPR_32 <95, "VGPR95">;
+def VGPR96 : VGPR_32 <96, "VGPR96">;
+def VGPR97 : VGPR_32 <97, "VGPR97">;
+def VGPR98 : VGPR_32 <98, "VGPR98">;
+def VGPR99 : VGPR_32 <99, "VGPR99">;
+def VGPR100 : VGPR_32 <100, "VGPR100">;
+def VGPR101 : VGPR_32 <101, "VGPR101">;
+def VGPR102 : VGPR_32 <102, "VGPR102">;
+def VGPR103 : VGPR_32 <103, "VGPR103">;
+def VGPR104 : VGPR_32 <104, "VGPR104">;
+def VGPR105 : VGPR_32 <105, "VGPR105">;
+def VGPR106 : VGPR_32 <106, "VGPR106">;
+def VGPR107 : VGPR_32 <107, "VGPR107">;
+def VGPR108 : VGPR_32 <108, "VGPR108">;
+def VGPR109 : VGPR_32 <109, "VGPR109">;
+def VGPR110 : VGPR_32 <110, "VGPR110">;
+def VGPR111 : VGPR_32 <111, "VGPR111">;
+def VGPR112 : VGPR_32 <112, "VGPR112">;
+def VGPR113 : VGPR_32 <113, "VGPR113">;
+def VGPR114 : VGPR_32 <114, "VGPR114">;
+def VGPR115 : VGPR_32 <115, "VGPR115">;
+def VGPR116 : VGPR_32 <116, "VGPR116">;
+def VGPR117 : VGPR_32 <117, "VGPR117">;
+def VGPR118 : VGPR_32 <118, "VGPR118">;
+def VGPR119 : VGPR_32 <119, "VGPR119">;
+def VGPR120 : VGPR_32 <120, "VGPR120">;
+def VGPR121 : VGPR_32 <121, "VGPR121">;
+def VGPR122 : VGPR_32 <122, "VGPR122">;
+def VGPR123 : VGPR_32 <123, "VGPR123">;
+def VGPR124 : VGPR_32 <124, "VGPR124">;
+def VGPR125 : VGPR_32 <125, "VGPR125">;
+def VGPR126 : VGPR_32 <126, "VGPR126">;
+def VGPR127 : VGPR_32 <127, "VGPR127">;
+def VGPR128 : VGPR_32 <128, "VGPR128">;
+def VGPR129 : VGPR_32 <129, "VGPR129">;
+def VGPR130 : VGPR_32 <130, "VGPR130">;
+def VGPR131 : VGPR_32 <131, "VGPR131">;
+def VGPR132 : VGPR_32 <132, "VGPR132">;
+def VGPR133 : VGPR_32 <133, "VGPR133">;
+def VGPR134 : VGPR_32 <134, "VGPR134">;
+def VGPR135 : VGPR_32 <135, "VGPR135">;
+def VGPR136 : VGPR_32 <136, "VGPR136">;
+def VGPR137 : VGPR_32 <137, "VGPR137">;
+def VGPR138 : VGPR_32 <138, "VGPR138">;
+def VGPR139 : VGPR_32 <139, "VGPR139">;
+def VGPR140 : VGPR_32 <140, "VGPR140">;
+def VGPR141 : VGPR_32 <141, "VGPR141">;
+def VGPR142 : VGPR_32 <142, "VGPR142">;
+def VGPR143 : VGPR_32 <143, "VGPR143">;
+def VGPR144 : VGPR_32 <144, "VGPR144">;
+def VGPR145 : VGPR_32 <145, "VGPR145">;
+def VGPR146 : VGPR_32 <146, "VGPR146">;
+def VGPR147 : VGPR_32 <147, "VGPR147">;
+def VGPR148 : VGPR_32 <148, "VGPR148">;
+def VGPR149 : VGPR_32 <149, "VGPR149">;
+def VGPR150 : VGPR_32 <150, "VGPR150">;
+def VGPR151 : VGPR_32 <151, "VGPR151">;
+def VGPR152 : VGPR_32 <152, "VGPR152">;
+def VGPR153 : VGPR_32 <153, "VGPR153">;
+def VGPR154 : VGPR_32 <154, "VGPR154">;
+def VGPR155 : VGPR_32 <155, "VGPR155">;
+def VGPR156 : VGPR_32 <156, "VGPR156">;
+def VGPR157 : VGPR_32 <157, "VGPR157">;
+def VGPR158 : VGPR_32 <158, "VGPR158">;
+def VGPR159 : VGPR_32 <159, "VGPR159">;
+def VGPR160 : VGPR_32 <160, "VGPR160">;
+def VGPR161 : VGPR_32 <161, "VGPR161">;
+def VGPR162 : VGPR_32 <162, "VGPR162">;
+def VGPR163 : VGPR_32 <163, "VGPR163">;
+def VGPR164 : VGPR_32 <164, "VGPR164">;
+def VGPR165 : VGPR_32 <165, "VGPR165">;
+def VGPR166 : VGPR_32 <166, "VGPR166">;
+def VGPR167 : VGPR_32 <167, "VGPR167">;
+def VGPR168 : VGPR_32 <168, "VGPR168">;
+def VGPR169 : VGPR_32 <169, "VGPR169">;
+def VGPR170 : VGPR_32 <170, "VGPR170">;
+def VGPR171 : VGPR_32 <171, "VGPR171">;
+def VGPR172 : VGPR_32 <172, "VGPR172">;
+def VGPR173 : VGPR_32 <173, "VGPR173">;
+def VGPR174 : VGPR_32 <174, "VGPR174">;
+def VGPR175 : VGPR_32 <175, "VGPR175">;
+def VGPR176 : VGPR_32 <176, "VGPR176">;
+def VGPR177 : VGPR_32 <177, "VGPR177">;
+def VGPR178 : VGPR_32 <178, "VGPR178">;
+def VGPR179 : VGPR_32 <179, "VGPR179">;
+def VGPR180 : VGPR_32 <180, "VGPR180">;
+def VGPR181 : VGPR_32 <181, "VGPR181">;
+def VGPR182 : VGPR_32 <182, "VGPR182">;
+def VGPR183 : VGPR_32 <183, "VGPR183">;
+def VGPR184 : VGPR_32 <184, "VGPR184">;
+def VGPR185 : VGPR_32 <185, "VGPR185">;
+def VGPR186 : VGPR_32 <186, "VGPR186">;
+def VGPR187 : VGPR_32 <187, "VGPR187">;
+def VGPR188 : VGPR_32 <188, "VGPR188">;
+def VGPR189 : VGPR_32 <189, "VGPR189">;
+def VGPR190 : VGPR_32 <190, "VGPR190">;
+def VGPR191 : VGPR_32 <191, "VGPR191">;
+def VGPR192 : VGPR_32 <192, "VGPR192">;
+def VGPR193 : VGPR_32 <193, "VGPR193">;
+def VGPR194 : VGPR_32 <194, "VGPR194">;
+def VGPR195 : VGPR_32 <195, "VGPR195">;
+def VGPR196 : VGPR_32 <196, "VGPR196">;
+def VGPR197 : VGPR_32 <197, "VGPR197">;
+def VGPR198 : VGPR_32 <198, "VGPR198">;
+def VGPR199 : VGPR_32 <199, "VGPR199">;
+def VGPR200 : VGPR_32 <200, "VGPR200">;
+def VGPR201 : VGPR_32 <201, "VGPR201">;
+def VGPR202 : VGPR_32 <202, "VGPR202">;
+def VGPR203 : VGPR_32 <203, "VGPR203">;
+def VGPR204 : VGPR_32 <204, "VGPR204">;
+def VGPR205 : VGPR_32 <205, "VGPR205">;
+def VGPR206 : VGPR_32 <206, "VGPR206">;
+def VGPR207 : VGPR_32 <207, "VGPR207">;
+def VGPR208 : VGPR_32 <208, "VGPR208">;
+def VGPR209 : VGPR_32 <209, "VGPR209">;
+def VGPR210 : VGPR_32 <210, "VGPR210">;
+def VGPR211 : VGPR_32 <211, "VGPR211">;
+def VGPR212 : VGPR_32 <212, "VGPR212">;
+def VGPR213 : VGPR_32 <213, "VGPR213">;
+def VGPR214 : VGPR_32 <214, "VGPR214">;
+def VGPR215 : VGPR_32 <215, "VGPR215">;
+def VGPR216 : VGPR_32 <216, "VGPR216">;
+def VGPR217 : VGPR_32 <217, "VGPR217">;
+def VGPR218 : VGPR_32 <218, "VGPR218">;
+def VGPR219 : VGPR_32 <219, "VGPR219">;
+def VGPR220 : VGPR_32 <220, "VGPR220">;
+def VGPR221 : VGPR_32 <221, "VGPR221">;
+def VGPR222 : VGPR_32 <222, "VGPR222">;
+def VGPR223 : VGPR_32 <223, "VGPR223">;
+def VGPR224 : VGPR_32 <224, "VGPR224">;
+def VGPR225 : VGPR_32 <225, "VGPR225">;
+def VGPR226 : VGPR_32 <226, "VGPR226">;
+def VGPR227 : VGPR_32 <227, "VGPR227">;
+def VGPR228 : VGPR_32 <228, "VGPR228">;
+def VGPR229 : VGPR_32 <229, "VGPR229">;
+def VGPR230 : VGPR_32 <230, "VGPR230">;
+def VGPR231 : VGPR_32 <231, "VGPR231">;
+def VGPR232 : VGPR_32 <232, "VGPR232">;
+def VGPR233 : VGPR_32 <233, "VGPR233">;
+def VGPR234 : VGPR_32 <234, "VGPR234">;
+def VGPR235 : VGPR_32 <235, "VGPR235">;
+def VGPR236 : VGPR_32 <236, "VGPR236">;
+def VGPR237 : VGPR_32 <237, "VGPR237">;
+def VGPR238 : VGPR_32 <238, "VGPR238">;
+def VGPR239 : VGPR_32 <239, "VGPR239">;
+def VGPR240 : VGPR_32 <240, "VGPR240">;
+def VGPR241 : VGPR_32 <241, "VGPR241">;
+def VGPR242 : VGPR_32 <242, "VGPR242">;
+def VGPR243 : VGPR_32 <243, "VGPR243">;
+def VGPR244 : VGPR_32 <244, "VGPR244">;
+def VGPR245 : VGPR_32 <245, "VGPR245">;
+def VGPR246 : VGPR_32 <246, "VGPR246">;
+def VGPR247 : VGPR_32 <247, "VGPR247">;
+def VGPR248 : VGPR_32 <248, "VGPR248">;
+def VGPR249 : VGPR_32 <249, "VGPR249">;
+def VGPR250 : VGPR_32 <250, "VGPR250">;
+def VGPR251 : VGPR_32 <251, "VGPR251">;
+def VGPR252 : VGPR_32 <252, "VGPR252">;
+def VGPR253 : VGPR_32 <253, "VGPR253">;
+def VGPR254 : VGPR_32 <254, "VGPR254">;
+def VGPR255 : VGPR_32 <255, "VGPR255">;
+
+def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "SGPR%u", 0, 103), SREG_LIT_0, M0)
+>;
+
+def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "VGPR%u", 0, 255),
+ PERSP_SAMPLE_I, PERSP_SAMPLE_J,
+ PERSP_CENTER_I, PERSP_CENTER_J,
+ PERSP_CENTROID_I, PERSP_CENTROID_J,
+ PERSP_I_W, PERSP_J_W, PERSP_1_W,
+ LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
+ LINEAR_CENTER_I, LINEAR_CENTER_J,
+ LINEAR_CENTROID_I, LINEAR_CENTROID_J,
+ LINE_STIPPLE_TEX_COORD,
+ POS_X_FLOAT,
+ POS_Y_FLOAT,
+ POS_Z_FLOAT,
+ POS_W_FLOAT,
+ FRONT_FACE,
+ ANCILLARY,
+ SAMPLE_COVERAGE,
+ POS_FIXED_PT
+ )
+>;
+
+def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add VReg_32, SReg_32)
+>;
+
+def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
+def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
+
+def SGPR0_64 : SGPR_64 <0, "SGPR0_64", [ SGPR0,SGPR1]>;
+def SGPR2_64 : SGPR_64 <2, "SGPR2_64", [ SGPR2,SGPR3]>;
+def SGPR4_64 : SGPR_64 <4, "SGPR4_64", [ SGPR4,SGPR5]>;
+def SGPR6_64 : SGPR_64 <6, "SGPR6_64", [ SGPR6,SGPR7]>;
+def SGPR8_64 : SGPR_64 <8, "SGPR8_64", [ SGPR8,SGPR9]>;
+def SGPR10_64 : SGPR_64 <10, "SGPR10_64", [ SGPR10,SGPR11]>;
+def SGPR12_64 : SGPR_64 <12, "SGPR12_64", [ SGPR12,SGPR13]>;
+def SGPR14_64 : SGPR_64 <14, "SGPR14_64", [ SGPR14,SGPR15]>;
+def SGPR16_64 : SGPR_64 <16, "SGPR16_64", [ SGPR16,SGPR17]>;
+def SGPR18_64 : SGPR_64 <18, "SGPR18_64", [ SGPR18,SGPR19]>;
+def SGPR20_64 : SGPR_64 <20, "SGPR20_64", [ SGPR20,SGPR21]>;
+def SGPR22_64 : SGPR_64 <22, "SGPR22_64", [ SGPR22,SGPR23]>;
+def SGPR24_64 : SGPR_64 <24, "SGPR24_64", [ SGPR24,SGPR25]>;
+def SGPR26_64 : SGPR_64 <26, "SGPR26_64", [ SGPR26,SGPR27]>;
+def SGPR28_64 : SGPR_64 <28, "SGPR28_64", [ SGPR28,SGPR29]>;
+def SGPR30_64 : SGPR_64 <30, "SGPR30_64", [ SGPR30,SGPR31]>;
+def SGPR32_64 : SGPR_64 <32, "SGPR32_64", [ SGPR32,SGPR33]>;
+def SGPR34_64 : SGPR_64 <34, "SGPR34_64", [ SGPR34,SGPR35]>;
+def SGPR36_64 : SGPR_64 <36, "SGPR36_64", [ SGPR36,SGPR37]>;
+def SGPR38_64 : SGPR_64 <38, "SGPR38_64", [ SGPR38,SGPR39]>;
+def SGPR40_64 : SGPR_64 <40, "SGPR40_64", [ SGPR40,SGPR41]>;
+def SGPR42_64 : SGPR_64 <42, "SGPR42_64", [ SGPR42,SGPR43]>;
+def SGPR44_64 : SGPR_64 <44, "SGPR44_64", [ SGPR44,SGPR45]>;
+def SGPR46_64 : SGPR_64 <46, "SGPR46_64", [ SGPR46,SGPR47]>;
+def SGPR48_64 : SGPR_64 <48, "SGPR48_64", [ SGPR48,SGPR49]>;
+def SGPR50_64 : SGPR_64 <50, "SGPR50_64", [ SGPR50,SGPR51]>;
+def SGPR52_64 : SGPR_64 <52, "SGPR52_64", [ SGPR52,SGPR53]>;
+def SGPR54_64 : SGPR_64 <54, "SGPR54_64", [ SGPR54,SGPR55]>;
+def SGPR56_64 : SGPR_64 <56, "SGPR56_64", [ SGPR56,SGPR57]>;
+def SGPR58_64 : SGPR_64 <58, "SGPR58_64", [ SGPR58,SGPR59]>;
+def SGPR60_64 : SGPR_64 <60, "SGPR60_64", [ SGPR60,SGPR61]>;
+def SGPR62_64 : SGPR_64 <62, "SGPR62_64", [ SGPR62,SGPR63]>;
+def SGPR64_64 : SGPR_64 <64, "SGPR64_64", [ SGPR64,SGPR65]>;
+def SGPR66_64 : SGPR_64 <66, "SGPR66_64", [ SGPR66,SGPR67]>;
+def SGPR68_64 : SGPR_64 <68, "SGPR68_64", [ SGPR68,SGPR69]>;
+def SGPR70_64 : SGPR_64 <70, "SGPR70_64", [ SGPR70,SGPR71]>;
+def SGPR72_64 : SGPR_64 <72, "SGPR72_64", [ SGPR72,SGPR73]>;
+def SGPR74_64 : SGPR_64 <74, "SGPR74_64", [ SGPR74,SGPR75]>;
+def SGPR76_64 : SGPR_64 <76, "SGPR76_64", [ SGPR76,SGPR77]>;
+def SGPR78_64 : SGPR_64 <78, "SGPR78_64", [ SGPR78,SGPR79]>;
+def SGPR80_64 : SGPR_64 <80, "SGPR80_64", [ SGPR80,SGPR81]>;
+def SGPR82_64 : SGPR_64 <82, "SGPR82_64", [ SGPR82,SGPR83]>;
+def SGPR84_64 : SGPR_64 <84, "SGPR84_64", [ SGPR84,SGPR85]>;
+def SGPR86_64 : SGPR_64 <86, "SGPR86_64", [ SGPR86,SGPR87]>;
+def SGPR88_64 : SGPR_64 <88, "SGPR88_64", [ SGPR88,SGPR89]>;
+def SGPR90_64 : SGPR_64 <90, "SGPR90_64", [ SGPR90,SGPR91]>;
+def SGPR92_64 : SGPR_64 <92, "SGPR92_64", [ SGPR92,SGPR93]>;
+def SGPR94_64 : SGPR_64 <94, "SGPR94_64", [ SGPR94,SGPR95]>;
+def SGPR96_64 : SGPR_64 <96, "SGPR96_64", [ SGPR96,SGPR97]>;
+def SGPR98_64 : SGPR_64 <98, "SGPR98_64", [ SGPR98,SGPR99]>;
+def SGPR100_64 : SGPR_64 <100, "SGPR100_64", [ SGPR100,SGPR101]>;
+def SGPR102_64 : SGPR_64 <102, "SGPR102_64", [ SGPR102,SGPR103]>;
+def SReg_64 : RegisterClass<"AMDGPU", [i64], 64,
+ (add SGPR0_64
+, SGPR2_64, SGPR4_64, SGPR6_64, SGPR8_64, SGPR10_64
+, SGPR12_64, SGPR14_64, SGPR16_64, SGPR18_64, SGPR20_64
+, SGPR22_64, SGPR24_64, SGPR26_64, SGPR28_64, SGPR30_64
+, SGPR32_64, SGPR34_64, SGPR36_64, SGPR38_64, SGPR40_64
+, SGPR42_64, SGPR44_64, SGPR46_64, SGPR48_64, SGPR50_64
+, SGPR52_64, SGPR54_64, SGPR56_64, SGPR58_64, SGPR60_64
+, SGPR62_64, SGPR64_64, SGPR66_64, SGPR68_64, SGPR70_64
+, SGPR72_64, SGPR74_64, SGPR76_64, SGPR78_64, SGPR80_64
+, SGPR82_64, SGPR84_64, SGPR86_64, SGPR88_64, SGPR90_64
+, SGPR92_64, SGPR94_64, SGPR96_64, SGPR98_64, SGPR100_64
+, SGPR102_64, VCC)
+>{
+}
+def SGPR0_128 : SGPR_128 <0, "SGPR0_128", [ SGPR0,SGPR1,SGPR2,SGPR3]>;
+def SGPR4_128 : SGPR_128 <4, "SGPR4_128", [ SGPR4,SGPR5,SGPR6,SGPR7]>;
+def SGPR8_128 : SGPR_128 <8, "SGPR8_128", [ SGPR8,SGPR9,SGPR10,SGPR11]>;
+def SGPR12_128 : SGPR_128 <12, "SGPR12_128", [ SGPR12,SGPR13,SGPR14,SGPR15]>;
+def SGPR16_128 : SGPR_128 <16, "SGPR16_128", [ SGPR16,SGPR17,SGPR18,SGPR19]>;
+def SGPR20_128 : SGPR_128 <20, "SGPR20_128", [ SGPR20,SGPR21,SGPR22,SGPR23]>;
+def SGPR24_128 : SGPR_128 <24, "SGPR24_128", [ SGPR24,SGPR25,SGPR26,SGPR27]>;
+def SGPR28_128 : SGPR_128 <28, "SGPR28_128", [ SGPR28,SGPR29,SGPR30,SGPR31]>;
+def SGPR32_128 : SGPR_128 <32, "SGPR32_128", [ SGPR32,SGPR33,SGPR34,SGPR35]>;
+def SGPR36_128 : SGPR_128 <36, "SGPR36_128", [ SGPR36,SGPR37,SGPR38,SGPR39]>;
+def SGPR40_128 : SGPR_128 <40, "SGPR40_128", [ SGPR40,SGPR41,SGPR42,SGPR43]>;
+def SGPR44_128 : SGPR_128 <44, "SGPR44_128", [ SGPR44,SGPR45,SGPR46,SGPR47]>;
+def SGPR48_128 : SGPR_128 <48, "SGPR48_128", [ SGPR48,SGPR49,SGPR50,SGPR51]>;
+def SGPR52_128 : SGPR_128 <52, "SGPR52_128", [ SGPR52,SGPR53,SGPR54,SGPR55]>;
+def SGPR56_128 : SGPR_128 <56, "SGPR56_128", [ SGPR56,SGPR57,SGPR58,SGPR59]>;
+def SGPR60_128 : SGPR_128 <60, "SGPR60_128", [ SGPR60,SGPR61,SGPR62,SGPR63]>;
+def SGPR64_128 : SGPR_128 <64, "SGPR64_128", [ SGPR64,SGPR65,SGPR66,SGPR67]>;
+def SGPR68_128 : SGPR_128 <68, "SGPR68_128", [ SGPR68,SGPR69,SGPR70,SGPR71]>;
+def SGPR72_128 : SGPR_128 <72, "SGPR72_128", [ SGPR72,SGPR73,SGPR74,SGPR75]>;
+def SGPR76_128 : SGPR_128 <76, "SGPR76_128", [ SGPR76,SGPR77,SGPR78,SGPR79]>;
+def SGPR80_128 : SGPR_128 <80, "SGPR80_128", [ SGPR80,SGPR81,SGPR82,SGPR83]>;
+def SGPR84_128 : SGPR_128 <84, "SGPR84_128", [ SGPR84,SGPR85,SGPR86,SGPR87]>;
+def SGPR88_128 : SGPR_128 <88, "SGPR88_128", [ SGPR88,SGPR89,SGPR90,SGPR91]>;
+def SGPR92_128 : SGPR_128 <92, "SGPR92_128", [ SGPR92,SGPR93,SGPR94,SGPR95]>;
+def SGPR96_128 : SGPR_128 <96, "SGPR96_128", [ SGPR96,SGPR97,SGPR98,SGPR99]>;
+def SGPR100_128 : SGPR_128 <100, "SGPR100_128", [ SGPR100,SGPR101,SGPR102,SGPR103]>;
+def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+ (add SGPR0_128
+, SGPR4_128, SGPR8_128, SGPR12_128, SGPR16_128, SGPR20_128
+, SGPR24_128, SGPR28_128, SGPR32_128, SGPR36_128, SGPR40_128
+, SGPR44_128, SGPR48_128, SGPR52_128, SGPR56_128, SGPR60_128
+, SGPR64_128, SGPR68_128, SGPR72_128, SGPR76_128, SGPR80_128
+, SGPR84_128, SGPR88_128, SGPR92_128, SGPR96_128, SGPR100_128
+)
+>{
+}
+def SGPR0_256 : SGPR_256 <0, "SGPR0_256", [ SGPR0,SGPR1,SGPR2,SGPR3,SGPR4,SGPR5,SGPR6,SGPR7]>;
+def SGPR8_256 : SGPR_256 <8, "SGPR8_256", [ SGPR8,SGPR9,SGPR10,SGPR11,SGPR12,SGPR13,SGPR14,SGPR15]>;
+def SGPR16_256 : SGPR_256 <16, "SGPR16_256", [ SGPR16,SGPR17,SGPR18,SGPR19,SGPR20,SGPR21,SGPR22,SGPR23]>;
+def SGPR24_256 : SGPR_256 <24, "SGPR24_256", [ SGPR24,SGPR25,SGPR26,SGPR27,SGPR28,SGPR29,SGPR30,SGPR31]>;
+def SGPR32_256 : SGPR_256 <32, "SGPR32_256", [ SGPR32,SGPR33,SGPR34,SGPR35,SGPR36,SGPR37,SGPR38,SGPR39]>;
+def SGPR40_256 : SGPR_256 <40, "SGPR40_256", [ SGPR40,SGPR41,SGPR42,SGPR43,SGPR44,SGPR45,SGPR46,SGPR47]>;
+def SGPR48_256 : SGPR_256 <48, "SGPR48_256", [ SGPR48,SGPR49,SGPR50,SGPR51,SGPR52,SGPR53,SGPR54,SGPR55]>;
+def SGPR56_256 : SGPR_256 <56, "SGPR56_256", [ SGPR56,SGPR57,SGPR58,SGPR59,SGPR60,SGPR61,SGPR62,SGPR63]>;
+def SGPR64_256 : SGPR_256 <64, "SGPR64_256", [ SGPR64,SGPR65,SGPR66,SGPR67,SGPR68,SGPR69,SGPR70,SGPR71]>;
+def SGPR72_256 : SGPR_256 <72, "SGPR72_256", [ SGPR72,SGPR73,SGPR74,SGPR75,SGPR76,SGPR77,SGPR78,SGPR79]>;
+def SGPR80_256 : SGPR_256 <80, "SGPR80_256", [ SGPR80,SGPR81,SGPR82,SGPR83,SGPR84,SGPR85,SGPR86,SGPR87]>;
+def SGPR88_256 : SGPR_256 <88, "SGPR88_256", [ SGPR88,SGPR89,SGPR90,SGPR91,SGPR92,SGPR93,SGPR94,SGPR95]>;
+def SGPR96_256 : SGPR_256 <96, "SGPR96_256", [ SGPR96,SGPR97,SGPR98,SGPR99,SGPR100,SGPR101,SGPR102,SGPR103]>;
+def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256,
+ (add SGPR0_256
+, SGPR8_256, SGPR16_256, SGPR24_256, SGPR32_256, SGPR40_256
+, SGPR48_256, SGPR56_256, SGPR64_256, SGPR72_256, SGPR80_256
+, SGPR88_256, SGPR96_256)
+>{
+}
+def VGPR0_64 : VGPR_64 <0, "VGPR0_64", [ VGPR0,VGPR1]>;
+def VGPR2_64 : VGPR_64 <2, "VGPR2_64", [ VGPR2,VGPR3]>;
+def VGPR4_64 : VGPR_64 <4, "VGPR4_64", [ VGPR4,VGPR5]>;
+def VGPR6_64 : VGPR_64 <6, "VGPR6_64", [ VGPR6,VGPR7]>;
+def VGPR8_64 : VGPR_64 <8, "VGPR8_64", [ VGPR8,VGPR9]>;
+def VGPR10_64 : VGPR_64 <10, "VGPR10_64", [ VGPR10,VGPR11]>;
+def VGPR12_64 : VGPR_64 <12, "VGPR12_64", [ VGPR12,VGPR13]>;
+def VGPR14_64 : VGPR_64 <14, "VGPR14_64", [ VGPR14,VGPR15]>;
+def VGPR16_64 : VGPR_64 <16, "VGPR16_64", [ VGPR16,VGPR17]>;
+def VGPR18_64 : VGPR_64 <18, "VGPR18_64", [ VGPR18,VGPR19]>;
+def VGPR20_64 : VGPR_64 <20, "VGPR20_64", [ VGPR20,VGPR21]>;
+def VGPR22_64 : VGPR_64 <22, "VGPR22_64", [ VGPR22,VGPR23]>;
+def VGPR24_64 : VGPR_64 <24, "VGPR24_64", [ VGPR24,VGPR25]>;
+def VGPR26_64 : VGPR_64 <26, "VGPR26_64", [ VGPR26,VGPR27]>;
+def VGPR28_64 : VGPR_64 <28, "VGPR28_64", [ VGPR28,VGPR29]>;
+def VGPR30_64 : VGPR_64 <30, "VGPR30_64", [ VGPR30,VGPR31]>;
+def VGPR32_64 : VGPR_64 <32, "VGPR32_64", [ VGPR32,VGPR33]>;
+def VGPR34_64 : VGPR_64 <34, "VGPR34_64", [ VGPR34,VGPR35]>;
+def VGPR36_64 : VGPR_64 <36, "VGPR36_64", [ VGPR36,VGPR37]>;
+def VGPR38_64 : VGPR_64 <38, "VGPR38_64", [ VGPR38,VGPR39]>;
+def VGPR40_64 : VGPR_64 <40, "VGPR40_64", [ VGPR40,VGPR41]>;
+def VGPR42_64 : VGPR_64 <42, "VGPR42_64", [ VGPR42,VGPR43]>;
+def VGPR44_64 : VGPR_64 <44, "VGPR44_64", [ VGPR44,VGPR45]>;
+def VGPR46_64 : VGPR_64 <46, "VGPR46_64", [ VGPR46,VGPR47]>;
+def VGPR48_64 : VGPR_64 <48, "VGPR48_64", [ VGPR48,VGPR49]>;
+def VGPR50_64 : VGPR_64 <50, "VGPR50_64", [ VGPR50,VGPR51]>;
+def VGPR52_64 : VGPR_64 <52, "VGPR52_64", [ VGPR52,VGPR53]>;
+def VGPR54_64 : VGPR_64 <54, "VGPR54_64", [ VGPR54,VGPR55]>;
+def VGPR56_64 : VGPR_64 <56, "VGPR56_64", [ VGPR56,VGPR57]>;
+def VGPR58_64 : VGPR_64 <58, "VGPR58_64", [ VGPR58,VGPR59]>;
+def VGPR60_64 : VGPR_64 <60, "VGPR60_64", [ VGPR60,VGPR61]>;
+def VGPR62_64 : VGPR_64 <62, "VGPR62_64", [ VGPR62,VGPR63]>;
+def VGPR64_64 : VGPR_64 <64, "VGPR64_64", [ VGPR64,VGPR65]>;
+def VGPR66_64 : VGPR_64 <66, "VGPR66_64", [ VGPR66,VGPR67]>;
+def VGPR68_64 : VGPR_64 <68, "VGPR68_64", [ VGPR68,VGPR69]>;
+def VGPR70_64 : VGPR_64 <70, "VGPR70_64", [ VGPR70,VGPR71]>;
+def VGPR72_64 : VGPR_64 <72, "VGPR72_64", [ VGPR72,VGPR73]>;
+def VGPR74_64 : VGPR_64 <74, "VGPR74_64", [ VGPR74,VGPR75]>;
+def VGPR76_64 : VGPR_64 <76, "VGPR76_64", [ VGPR76,VGPR77]>;
+def VGPR78_64 : VGPR_64 <78, "VGPR78_64", [ VGPR78,VGPR79]>;
+def VGPR80_64 : VGPR_64 <80, "VGPR80_64", [ VGPR80,VGPR81]>;
+def VGPR82_64 : VGPR_64 <82, "VGPR82_64", [ VGPR82,VGPR83]>;
+def VGPR84_64 : VGPR_64 <84, "VGPR84_64", [ VGPR84,VGPR85]>;
+def VGPR86_64 : VGPR_64 <86, "VGPR86_64", [ VGPR86,VGPR87]>;
+def VGPR88_64 : VGPR_64 <88, "VGPR88_64", [ VGPR88,VGPR89]>;
+def VGPR90_64 : VGPR_64 <90, "VGPR90_64", [ VGPR90,VGPR91]>;
+def VGPR92_64 : VGPR_64 <92, "VGPR92_64", [ VGPR92,VGPR93]>;
+def VGPR94_64 : VGPR_64 <94, "VGPR94_64", [ VGPR94,VGPR95]>;
+def VGPR96_64 : VGPR_64 <96, "VGPR96_64", [ VGPR96,VGPR97]>;
+def VGPR98_64 : VGPR_64 <98, "VGPR98_64", [ VGPR98,VGPR99]>;
+def VGPR100_64 : VGPR_64 <100, "VGPR100_64", [ VGPR100,VGPR101]>;
+def VGPR102_64 : VGPR_64 <102, "VGPR102_64", [ VGPR102,VGPR103]>;
+def VGPR104_64 : VGPR_64 <104, "VGPR104_64", [ VGPR104,VGPR105]>;
+def VGPR106_64 : VGPR_64 <106, "VGPR106_64", [ VGPR106,VGPR107]>;
+def VGPR108_64 : VGPR_64 <108, "VGPR108_64", [ VGPR108,VGPR109]>;
+def VGPR110_64 : VGPR_64 <110, "VGPR110_64", [ VGPR110,VGPR111]>;
+def VGPR112_64 : VGPR_64 <112, "VGPR112_64", [ VGPR112,VGPR113]>;
+def VGPR114_64 : VGPR_64 <114, "VGPR114_64", [ VGPR114,VGPR115]>;
+def VGPR116_64 : VGPR_64 <116, "VGPR116_64", [ VGPR116,VGPR117]>;
+def VGPR118_64 : VGPR_64 <118, "VGPR118_64", [ VGPR118,VGPR119]>;
+def VGPR120_64 : VGPR_64 <120, "VGPR120_64", [ VGPR120,VGPR121]>;
+def VGPR122_64 : VGPR_64 <122, "VGPR122_64", [ VGPR122,VGPR123]>;
+def VGPR124_64 : VGPR_64 <124, "VGPR124_64", [ VGPR124,VGPR125]>;
+def VGPR126_64 : VGPR_64 <126, "VGPR126_64", [ VGPR126,VGPR127]>;
+def VGPR128_64 : VGPR_64 <128, "VGPR128_64", [ VGPR128,VGPR129]>;
+def VGPR130_64 : VGPR_64 <130, "VGPR130_64", [ VGPR130,VGPR131]>;
+def VGPR132_64 : VGPR_64 <132, "VGPR132_64", [ VGPR132,VGPR133]>;
+def VGPR134_64 : VGPR_64 <134, "VGPR134_64", [ VGPR134,VGPR135]>;
+def VGPR136_64 : VGPR_64 <136, "VGPR136_64", [ VGPR136,VGPR137]>;
+def VGPR138_64 : VGPR_64 <138, "VGPR138_64", [ VGPR138,VGPR139]>;
+def VGPR140_64 : VGPR_64 <140, "VGPR140_64", [ VGPR140,VGPR141]>;
+def VGPR142_64 : VGPR_64 <142, "VGPR142_64", [ VGPR142,VGPR143]>;
+def VGPR144_64 : VGPR_64 <144, "VGPR144_64", [ VGPR144,VGPR145]>;
+def VGPR146_64 : VGPR_64 <146, "VGPR146_64", [ VGPR146,VGPR147]>;
+def VGPR148_64 : VGPR_64 <148, "VGPR148_64", [ VGPR148,VGPR149]>;
+def VGPR150_64 : VGPR_64 <150, "VGPR150_64", [ VGPR150,VGPR151]>;
+def VGPR152_64 : VGPR_64 <152, "VGPR152_64", [ VGPR152,VGPR153]>;
+def VGPR154_64 : VGPR_64 <154, "VGPR154_64", [ VGPR154,VGPR155]>;
+def VGPR156_64 : VGPR_64 <156, "VGPR156_64", [ VGPR156,VGPR157]>;
+def VGPR158_64 : VGPR_64 <158, "VGPR158_64", [ VGPR158,VGPR159]>;
+def VGPR160_64 : VGPR_64 <160, "VGPR160_64", [ VGPR160,VGPR161]>;
+def VGPR162_64 : VGPR_64 <162, "VGPR162_64", [ VGPR162,VGPR163]>;
+def VGPR164_64 : VGPR_64 <164, "VGPR164_64", [ VGPR164,VGPR165]>;
+def VGPR166_64 : VGPR_64 <166, "VGPR166_64", [ VGPR166,VGPR167]>;
+def VGPR168_64 : VGPR_64 <168, "VGPR168_64", [ VGPR168,VGPR169]>;
+def VGPR170_64 : VGPR_64 <170, "VGPR170_64", [ VGPR170,VGPR171]>;
+def VGPR172_64 : VGPR_64 <172, "VGPR172_64", [ VGPR172,VGPR173]>;
+def VGPR174_64 : VGPR_64 <174, "VGPR174_64", [ VGPR174,VGPR175]>;
+def VGPR176_64 : VGPR_64 <176, "VGPR176_64", [ VGPR176,VGPR177]>;
+def VGPR178_64 : VGPR_64 <178, "VGPR178_64", [ VGPR178,VGPR179]>;
+def VGPR180_64 : VGPR_64 <180, "VGPR180_64", [ VGPR180,VGPR181]>;
+def VGPR182_64 : VGPR_64 <182, "VGPR182_64", [ VGPR182,VGPR183]>;
+def VGPR184_64 : VGPR_64 <184, "VGPR184_64", [ VGPR184,VGPR185]>;
+def VGPR186_64 : VGPR_64 <186, "VGPR186_64", [ VGPR186,VGPR187]>;
+def VGPR188_64 : VGPR_64 <188, "VGPR188_64", [ VGPR188,VGPR189]>;
+def VGPR190_64 : VGPR_64 <190, "VGPR190_64", [ VGPR190,VGPR191]>;
+def VGPR192_64 : VGPR_64 <192, "VGPR192_64", [ VGPR192,VGPR193]>;
+def VGPR194_64 : VGPR_64 <194, "VGPR194_64", [ VGPR194,VGPR195]>;
+def VGPR196_64 : VGPR_64 <196, "VGPR196_64", [ VGPR196,VGPR197]>;
+def VGPR198_64 : VGPR_64 <198, "VGPR198_64", [ VGPR198,VGPR199]>;
+def VGPR200_64 : VGPR_64 <200, "VGPR200_64", [ VGPR200,VGPR201]>;
+def VGPR202_64 : VGPR_64 <202, "VGPR202_64", [ VGPR202,VGPR203]>;
+def VGPR204_64 : VGPR_64 <204, "VGPR204_64", [ VGPR204,VGPR205]>;
+def VGPR206_64 : VGPR_64 <206, "VGPR206_64", [ VGPR206,VGPR207]>;
+def VGPR208_64 : VGPR_64 <208, "VGPR208_64", [ VGPR208,VGPR209]>;
+def VGPR210_64 : VGPR_64 <210, "VGPR210_64", [ VGPR210,VGPR211]>;
+def VGPR212_64 : VGPR_64 <212, "VGPR212_64", [ VGPR212,VGPR213]>;
+def VGPR214_64 : VGPR_64 <214, "VGPR214_64", [ VGPR214,VGPR215]>;
+def VGPR216_64 : VGPR_64 <216, "VGPR216_64", [ VGPR216,VGPR217]>;
+def VGPR218_64 : VGPR_64 <218, "VGPR218_64", [ VGPR218,VGPR219]>;
+def VGPR220_64 : VGPR_64 <220, "VGPR220_64", [ VGPR220,VGPR221]>;
+def VGPR222_64 : VGPR_64 <222, "VGPR222_64", [ VGPR222,VGPR223]>;
+def VGPR224_64 : VGPR_64 <224, "VGPR224_64", [ VGPR224,VGPR225]>;
+def VGPR226_64 : VGPR_64 <226, "VGPR226_64", [ VGPR226,VGPR227]>;
+def VGPR228_64 : VGPR_64 <228, "VGPR228_64", [ VGPR228,VGPR229]>;
+def VGPR230_64 : VGPR_64 <230, "VGPR230_64", [ VGPR230,VGPR231]>;
+def VGPR232_64 : VGPR_64 <232, "VGPR232_64", [ VGPR232,VGPR233]>;
+def VGPR234_64 : VGPR_64 <234, "VGPR234_64", [ VGPR234,VGPR235]>;
+def VGPR236_64 : VGPR_64 <236, "VGPR236_64", [ VGPR236,VGPR237]>;
+def VGPR238_64 : VGPR_64 <238, "VGPR238_64", [ VGPR238,VGPR239]>;
+def VGPR240_64 : VGPR_64 <240, "VGPR240_64", [ VGPR240,VGPR241]>;
+def VGPR242_64 : VGPR_64 <242, "VGPR242_64", [ VGPR242,VGPR243]>;
+def VGPR244_64 : VGPR_64 <244, "VGPR244_64", [ VGPR244,VGPR245]>;
+def VGPR246_64 : VGPR_64 <246, "VGPR246_64", [ VGPR246,VGPR247]>;
+def VGPR248_64 : VGPR_64 <248, "VGPR248_64", [ VGPR248,VGPR249]>;
+def VGPR250_64 : VGPR_64 <250, "VGPR250_64", [ VGPR250,VGPR251]>;
+def VGPR252_64 : VGPR_64 <252, "VGPR252_64", [ VGPR252,VGPR253]>;
+def VGPR254_64 : VGPR_64 <254, "VGPR254_64", [ VGPR254,VGPR255]>;
+def VReg_64 : RegisterClass<"AMDGPU", [i64], 64,
+ (add VGPR0_64
+, VGPR2_64, VGPR4_64, VGPR6_64, VGPR8_64, VGPR10_64
+, VGPR12_64, VGPR14_64, VGPR16_64, VGPR18_64, VGPR20_64
+, VGPR22_64, VGPR24_64, VGPR26_64, VGPR28_64, VGPR30_64
+, VGPR32_64, VGPR34_64, VGPR36_64, VGPR38_64, VGPR40_64
+, VGPR42_64, VGPR44_64, VGPR46_64, VGPR48_64, VGPR50_64
+, VGPR52_64, VGPR54_64, VGPR56_64, VGPR58_64, VGPR60_64
+, VGPR62_64, VGPR64_64, VGPR66_64, VGPR68_64, VGPR70_64
+, VGPR72_64, VGPR74_64, VGPR76_64, VGPR78_64, VGPR80_64
+, VGPR82_64, VGPR84_64, VGPR86_64, VGPR88_64, VGPR90_64
+, VGPR92_64, VGPR94_64, VGPR96_64, VGPR98_64, VGPR100_64
+, VGPR102_64, VGPR104_64, VGPR106_64, VGPR108_64, VGPR110_64
+, VGPR112_64, VGPR114_64, VGPR116_64, VGPR118_64, VGPR120_64
+, VGPR122_64, VGPR124_64, VGPR126_64, VGPR128_64, VGPR130_64
+, VGPR132_64, VGPR134_64, VGPR136_64, VGPR138_64, VGPR140_64
+, VGPR142_64, VGPR144_64, VGPR146_64, VGPR148_64, VGPR150_64
+, VGPR152_64, VGPR154_64, VGPR156_64, VGPR158_64, VGPR160_64
+, VGPR162_64, VGPR164_64, VGPR166_64, VGPR168_64, VGPR170_64
+, VGPR172_64, VGPR174_64, VGPR176_64, VGPR178_64, VGPR180_64
+, VGPR182_64, VGPR184_64, VGPR186_64, VGPR188_64, VGPR190_64
+, VGPR192_64, VGPR194_64, VGPR196_64, VGPR198_64, VGPR200_64
+, VGPR202_64, VGPR204_64, VGPR206_64, VGPR208_64, VGPR210_64
+, VGPR212_64, VGPR214_64, VGPR216_64, VGPR218_64, VGPR220_64
+, VGPR222_64, VGPR224_64, VGPR226_64, VGPR228_64, VGPR230_64
+, VGPR232_64, VGPR234_64, VGPR236_64, VGPR238_64, VGPR240_64
+, VGPR242_64, VGPR244_64, VGPR246_64, VGPR248_64, VGPR250_64
+, VGPR252_64, VGPR254_64)
+>{
+}
+def VGPR0_128 : VGPR_128 <0, "VGPR0_128", [ VGPR0,VGPR1,VGPR2,VGPR3]>;
+def VGPR4_128 : VGPR_128 <4, "VGPR4_128", [ VGPR4,VGPR5,VGPR6,VGPR7]>;
+def VGPR8_128 : VGPR_128 <8, "VGPR8_128", [ VGPR8,VGPR9,VGPR10,VGPR11]>;
+def VGPR12_128 : VGPR_128 <12, "VGPR12_128", [ VGPR12,VGPR13,VGPR14,VGPR15]>;
+def VGPR16_128 : VGPR_128 <16, "VGPR16_128", [ VGPR16,VGPR17,VGPR18,VGPR19]>;
+def VGPR20_128 : VGPR_128 <20, "VGPR20_128", [ VGPR20,VGPR21,VGPR22,VGPR23]>;
+def VGPR24_128 : VGPR_128 <24, "VGPR24_128", [ VGPR24,VGPR25,VGPR26,VGPR27]>;
+def VGPR28_128 : VGPR_128 <28, "VGPR28_128", [ VGPR28,VGPR29,VGPR30,VGPR31]>;
+def VGPR32_128 : VGPR_128 <32, "VGPR32_128", [ VGPR32,VGPR33,VGPR34,VGPR35]>;
+def VGPR36_128 : VGPR_128 <36, "VGPR36_128", [ VGPR36,VGPR37,VGPR38,VGPR39]>;
+def VGPR40_128 : VGPR_128 <40, "VGPR40_128", [ VGPR40,VGPR41,VGPR42,VGPR43]>;
+def VGPR44_128 : VGPR_128 <44, "VGPR44_128", [ VGPR44,VGPR45,VGPR46,VGPR47]>;
+def VGPR48_128 : VGPR_128 <48, "VGPR48_128", [ VGPR48,VGPR49,VGPR50,VGPR51]>;
+def VGPR52_128 : VGPR_128 <52, "VGPR52_128", [ VGPR52,VGPR53,VGPR54,VGPR55]>;
+def VGPR56_128 : VGPR_128 <56, "VGPR56_128", [ VGPR56,VGPR57,VGPR58,VGPR59]>;
+def VGPR60_128 : VGPR_128 <60, "VGPR60_128", [ VGPR60,VGPR61,VGPR62,VGPR63]>;
+def VGPR64_128 : VGPR_128 <64, "VGPR64_128", [ VGPR64,VGPR65,VGPR66,VGPR67]>;
+def VGPR68_128 : VGPR_128 <68, "VGPR68_128", [ VGPR68,VGPR69,VGPR70,VGPR71]>;
+def VGPR72_128 : VGPR_128 <72, "VGPR72_128", [ VGPR72,VGPR73,VGPR74,VGPR75]>;
+def VGPR76_128 : VGPR_128 <76, "VGPR76_128", [ VGPR76,VGPR77,VGPR78,VGPR79]>;
+def VGPR80_128 : VGPR_128 <80, "VGPR80_128", [ VGPR80,VGPR81,VGPR82,VGPR83]>;
+def VGPR84_128 : VGPR_128 <84, "VGPR84_128", [ VGPR84,VGPR85,VGPR86,VGPR87]>;
+def VGPR88_128 : VGPR_128 <88, "VGPR88_128", [ VGPR88,VGPR89,VGPR90,VGPR91]>;
+def VGPR92_128 : VGPR_128 <92, "VGPR92_128", [ VGPR92,VGPR93,VGPR94,VGPR95]>;
+def VGPR96_128 : VGPR_128 <96, "VGPR96_128", [ VGPR96,VGPR97,VGPR98,VGPR99]>;
+def VGPR100_128 : VGPR_128 <100, "VGPR100_128", [ VGPR100,VGPR101,VGPR102,VGPR103]>;
+def VGPR104_128 : VGPR_128 <104, "VGPR104_128", [ VGPR104,VGPR105,VGPR106,VGPR107]>;
+def VGPR108_128 : VGPR_128 <108, "VGPR108_128", [ VGPR108,VGPR109,VGPR110,VGPR111]>;
+def VGPR112_128 : VGPR_128 <112, "VGPR112_128", [ VGPR112,VGPR113,VGPR114,VGPR115]>;
+def VGPR116_128 : VGPR_128 <116, "VGPR116_128", [ VGPR116,VGPR117,VGPR118,VGPR119]>;
+def VGPR120_128 : VGPR_128 <120, "VGPR120_128", [ VGPR120,VGPR121,VGPR122,VGPR123]>;
+def VGPR124_128 : VGPR_128 <124, "VGPR124_128", [ VGPR124,VGPR125,VGPR126,VGPR127]>;
+def VGPR128_128 : VGPR_128 <128, "VGPR128_128", [ VGPR128,VGPR129,VGPR130,VGPR131]>;
+def VGPR132_128 : VGPR_128 <132, "VGPR132_128", [ VGPR132,VGPR133,VGPR134,VGPR135]>;
+def VGPR136_128 : VGPR_128 <136, "VGPR136_128", [ VGPR136,VGPR137,VGPR138,VGPR139]>;
+def VGPR140_128 : VGPR_128 <140, "VGPR140_128", [ VGPR140,VGPR141,VGPR142,VGPR143]>;
+def VGPR144_128 : VGPR_128 <144, "VGPR144_128", [ VGPR144,VGPR145,VGPR146,VGPR147]>;
+def VGPR148_128 : VGPR_128 <148, "VGPR148_128", [ VGPR148,VGPR149,VGPR150,VGPR151]>;
+def VGPR152_128 : VGPR_128 <152, "VGPR152_128", [ VGPR152,VGPR153,VGPR154,VGPR155]>;
+def VGPR156_128 : VGPR_128 <156, "VGPR156_128", [ VGPR156,VGPR157,VGPR158,VGPR159]>;
+def VGPR160_128 : VGPR_128 <160, "VGPR160_128", [ VGPR160,VGPR161,VGPR162,VGPR163]>;
+def VGPR164_128 : VGPR_128 <164, "VGPR164_128", [ VGPR164,VGPR165,VGPR166,VGPR167]>;
+def VGPR168_128 : VGPR_128 <168, "VGPR168_128", [ VGPR168,VGPR169,VGPR170,VGPR171]>;
+def VGPR172_128 : VGPR_128 <172, "VGPR172_128", [ VGPR172,VGPR173,VGPR174,VGPR175]>;
+def VGPR176_128 : VGPR_128 <176, "VGPR176_128", [ VGPR176,VGPR177,VGPR178,VGPR179]>;
+def VGPR180_128 : VGPR_128 <180, "VGPR180_128", [ VGPR180,VGPR181,VGPR182,VGPR183]>;
+def VGPR184_128 : VGPR_128 <184, "VGPR184_128", [ VGPR184,VGPR185,VGPR186,VGPR187]>;
+def VGPR188_128 : VGPR_128 <188, "VGPR188_128", [ VGPR188,VGPR189,VGPR190,VGPR191]>;
+def VGPR192_128 : VGPR_128 <192, "VGPR192_128", [ VGPR192,VGPR193,VGPR194,VGPR195]>;
+def VGPR196_128 : VGPR_128 <196, "VGPR196_128", [ VGPR196,VGPR197,VGPR198,VGPR199]>;
+def VGPR200_128 : VGPR_128 <200, "VGPR200_128", [ VGPR200,VGPR201,VGPR202,VGPR203]>;
+def VGPR204_128 : VGPR_128 <204, "VGPR204_128", [ VGPR204,VGPR205,VGPR206,VGPR207]>;
+def VGPR208_128 : VGPR_128 <208, "VGPR208_128", [ VGPR208,VGPR209,VGPR210,VGPR211]>;
+def VGPR212_128 : VGPR_128 <212, "VGPR212_128", [ VGPR212,VGPR213,VGPR214,VGPR215]>;
+def VGPR216_128 : VGPR_128 <216, "VGPR216_128", [ VGPR216,VGPR217,VGPR218,VGPR219]>;
+def VGPR220_128 : VGPR_128 <220, "VGPR220_128", [ VGPR220,VGPR221,VGPR222,VGPR223]>;
+def VGPR224_128 : VGPR_128 <224, "VGPR224_128", [ VGPR224,VGPR225,VGPR226,VGPR227]>;
+def VGPR228_128 : VGPR_128 <228, "VGPR228_128", [ VGPR228,VGPR229,VGPR230,VGPR231]>;
+def VGPR232_128 : VGPR_128 <232, "VGPR232_128", [ VGPR232,VGPR233,VGPR234,VGPR235]>;
+def VGPR236_128 : VGPR_128 <236, "VGPR236_128", [ VGPR236,VGPR237,VGPR238,VGPR239]>;
+def VGPR240_128 : VGPR_128 <240, "VGPR240_128", [ VGPR240,VGPR241,VGPR242,VGPR243]>;
+def VGPR244_128 : VGPR_128 <244, "VGPR244_128", [ VGPR244,VGPR245,VGPR246,VGPR247]>;
+def VGPR248_128 : VGPR_128 <248, "VGPR248_128", [ VGPR248,VGPR249,VGPR250,VGPR251]>;
+def VGPR252_128 : VGPR_128 <252, "VGPR252_128", [ VGPR252,VGPR253,VGPR254,VGPR255]>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128,
+ (add VGPR0_128
+, VGPR4_128, VGPR8_128, VGPR12_128, VGPR16_128, VGPR20_128
+, VGPR24_128, VGPR28_128, VGPR32_128, VGPR36_128, VGPR40_128
+, VGPR44_128, VGPR48_128, VGPR52_128, VGPR56_128, VGPR60_128
+, VGPR64_128, VGPR68_128, VGPR72_128, VGPR76_128, VGPR80_128
+, VGPR84_128, VGPR88_128, VGPR92_128, VGPR96_128, VGPR100_128
+, VGPR104_128, VGPR108_128, VGPR112_128, VGPR116_128, VGPR120_128
+, VGPR124_128, VGPR128_128, VGPR132_128, VGPR136_128, VGPR140_128
+, VGPR144_128, VGPR148_128, VGPR152_128, VGPR156_128, VGPR160_128
+, VGPR164_128, VGPR168_128, VGPR172_128, VGPR176_128, VGPR180_128
+, VGPR184_128, VGPR188_128, VGPR192_128, VGPR196_128, VGPR200_128
+, VGPR204_128, VGPR208_128, VGPR212_128, VGPR216_128, VGPR220_128
+, VGPR224_128, VGPR228_128, VGPR232_128, VGPR236_128, VGPR240_128
+, VGPR244_128, VGPR248_128, VGPR252_128)
+>{
+}
+
+def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64,
+ (add SGPR0_64
+,SGPR2_64,SGPR4_64,SGPR6_64,SGPR8_64,SGPR10_64
+,SGPR12_64,SGPR14_64,SGPR16_64,SGPR18_64,SGPR20_64
+,SGPR22_64,SGPR24_64,SGPR26_64,SGPR28_64,SGPR30_64
+,SGPR32_64,SGPR34_64,SGPR36_64,SGPR38_64,SGPR40_64
+,SGPR42_64,SGPR44_64,SGPR46_64,SGPR48_64,SGPR50_64
+,SGPR52_64,SGPR54_64,SGPR56_64,SGPR58_64,SGPR60_64
+,SGPR62_64,SGPR64_64,SGPR66_64,SGPR68_64,SGPR70_64
+,SGPR72_64,SGPR74_64,SGPR76_64,SGPR78_64,SGPR80_64
+,SGPR82_64,SGPR84_64,SGPR86_64,SGPR88_64,SGPR90_64
+,SGPR92_64,SGPR94_64,SGPR96_64,SGPR98_64,SGPR100_64
+,SGPR102_64,VCC, VGPR0_64
+,VGPR2_64,VGPR4_64,VGPR6_64,VGPR8_64,VGPR10_64
+,VGPR12_64,VGPR14_64,VGPR16_64,VGPR18_64,VGPR20_64
+,VGPR22_64,VGPR24_64,VGPR26_64,VGPR28_64,VGPR30_64
+,VGPR32_64,VGPR34_64,VGPR36_64,VGPR38_64,VGPR40_64
+,VGPR42_64,VGPR44_64,VGPR46_64,VGPR48_64,VGPR50_64
+,VGPR52_64,VGPR54_64,VGPR56_64,VGPR58_64,VGPR60_64
+,VGPR62_64,VGPR64_64,VGPR66_64,VGPR68_64,VGPR70_64
+,VGPR72_64,VGPR74_64,VGPR76_64,VGPR78_64,VGPR80_64
+,VGPR82_64,VGPR84_64,VGPR86_64,VGPR88_64,VGPR90_64
+,VGPR92_64,VGPR94_64,VGPR96_64,VGPR98_64,VGPR100_64
+,VGPR102_64,VGPR104_64,VGPR106_64,VGPR108_64,VGPR110_64
+,VGPR112_64,VGPR114_64,VGPR116_64,VGPR118_64,VGPR120_64
+,VGPR122_64,VGPR124_64,VGPR126_64,VGPR128_64,VGPR130_64
+,VGPR132_64,VGPR134_64,VGPR136_64,VGPR138_64,VGPR140_64
+,VGPR142_64,VGPR144_64,VGPR146_64,VGPR148_64,VGPR150_64
+,VGPR152_64,VGPR154_64,VGPR156_64,VGPR158_64,VGPR160_64
+,VGPR162_64,VGPR164_64,VGPR166_64,VGPR168_64,VGPR170_64
+,VGPR172_64,VGPR174_64,VGPR176_64,VGPR178_64,VGPR180_64
+,VGPR182_64,VGPR184_64,VGPR186_64,VGPR188_64,VGPR190_64
+,VGPR192_64,VGPR194_64,VGPR196_64,VGPR198_64,VGPR200_64
+,VGPR202_64,VGPR204_64,VGPR206_64,VGPR208_64,VGPR210_64
+,VGPR212_64,VGPR214_64,VGPR216_64,VGPR218_64,VGPR220_64
+,VGPR222_64,VGPR224_64,VGPR226_64,VGPR228_64,VGPR230_64
+,VGPR232_64,VGPR234_64,VGPR236_64,VGPR238_64,VGPR240_64
+,VGPR242_64,VGPR244_64,VGPR246_64,VGPR248_64,VGPR250_64
+,VGPR252_64,VGPR254_64)
+>;
+
diff --git a/lib/Target/AMDGPU/SISchedule.td b/lib/Target/AMDGPU/SISchedule.td
new file mode 100644
index 0000000..28b65b8
--- /dev/null
+++ b/lib/Target/AMDGPU/SISchedule.td
@@ -0,0 +1,15 @@
+//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: This is just a place holder for now.
+//
+//===----------------------------------------------------------------------===//
+
+
+def SI_Itin : ProcessorItineraries <[], [], []>;
diff --git a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
new file mode 100644
index 0000000..380e7de
--- /dev/null
+++ b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- TargetInfo/AMDGPUTargetInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+/// The target for the AMDGPU backend
+Target llvm::TheAMDGPUTarget;
+
+/// Extern function to initialize the targets for the AMDGPU backend
+extern "C" void LLVMInitializeAMDGPUTargetInfo() {
+ RegisterTarget<Triple::r600, false>
+ R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
+}
diff --git a/lib/Target/AMDGPU/TargetInfo/CMakeLists.txt b/lib/Target/AMDGPU/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..17ad123
--- /dev/null
+++ b/lib/Target/AMDGPU/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAMDGPUInfo
+ AMDGPUTargetInfo.cpp
+ )
+
+add_dependencies(LLVMAMDGPUInfo AMDGPUCommonTableGen intrinsics_gen)
diff --git a/lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt b/lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt
new file mode 100644
index 0000000..3dfca10
--- /dev/null
+++ b/lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AMDGPUInfo
+parent = AMDGPU
+required_libraries = MC Support
+add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/TargetInfo/Makefile b/lib/Target/AMDGPU/TargetInfo/Makefile
new file mode 100644
index 0000000..1b23287
--- /dev/null
+++ b/lib/Target/AMDGPU/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AMDGPU/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAMDGPUInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index 8995080..97c8796 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
+subdirectories = AMDGPU ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the
--
1.7.7.6
More information about the llvm-commits
mailing list