[llvm-commits] [PATCH 1/6] AMDIL: Add core backend files for R600/SI codegen
Tom Stellard
tstellar at gmail.com
Wed Apr 25 12:30:53 PDT 2012
---
lib/Target/AMDIL/AMDGPU.h | 47 +
lib/Target/AMDIL/AMDGPUISelLowering.cpp | 31 +
lib/Target/AMDIL/AMDGPUISelLowering.h | 35 +
lib/Target/AMDIL/AMDGPUInstrInfo.cpp | 116 +
lib/Target/AMDIL/AMDGPUInstrInfo.h | 59 +
lib/Target/AMDIL/AMDGPURegisterInfo.cpp | 24 +
lib/Target/AMDIL/AMDGPURegisterInfo.h | 38 +
lib/Target/AMDIL/AMDGPUTargetMachine.cpp | 161 +
lib/Target/AMDIL/AMDGPUTargetMachine.h | 62 +
lib/Target/AMDIL/AMDGPUUtil.cpp | 126 +
lib/Target/AMDIL/AMDGPUUtil.h | 49 +
lib/Target/AMDIL/AMDIL.h | 280 +
lib/Target/AMDIL/AMDIL7XXDevice.cpp | 128 +
lib/Target/AMDIL/AMDIL7XXDevice.h | 71 +
lib/Target/AMDIL/AMDILDevice.cpp | 137 +
lib/Target/AMDIL/AMDILDevice.h | 116 +
lib/Target/AMDIL/AMDILDeviceInfo.cpp | 87 +
lib/Target/AMDIL/AMDILDeviceInfo.h | 89 +
lib/Target/AMDIL/AMDILDevices.h | 19 +
lib/Target/AMDIL/AMDILEvergreenDevice.cpp | 183 +
lib/Target/AMDIL/AMDILEvergreenDevice.h | 87 +
lib/Target/AMDIL/AMDILFrameLowering.cpp | 53 +
lib/Target/AMDIL/AMDILFrameLowering.h | 46 +
lib/Target/AMDIL/AMDILISelDAGToDAG.cpp | 457 ++
lib/Target/AMDIL/AMDILISelLowering.cpp | 5576 ++++++++++++++++++++
lib/Target/AMDIL/AMDILISelLowering.h | 527 ++
lib/Target/AMDIL/AMDILInstrInfo.cpp | 709 +++
lib/Target/AMDIL/AMDILInstrInfo.h | 175 +
lib/Target/AMDIL/AMDILIntrinsicInfo.cpp | 190 +
lib/Target/AMDIL/AMDILIntrinsicInfo.h | 49 +
lib/Target/AMDIL/AMDILNIDevice.cpp | 71 +
lib/Target/AMDIL/AMDILNIDevice.h | 59 +
lib/Target/AMDIL/AMDILRegisterInfo.cpp | 200 +
lib/Target/AMDIL/AMDILRegisterInfo.h | 91 +
lib/Target/AMDIL/AMDILSIDevice.cpp | 49 +
lib/Target/AMDIL/AMDILSIDevice.h | 45 +
lib/Target/AMDIL/AMDILSubtarget.cpp | 177 +
lib/Target/AMDIL/AMDILSubtarget.h | 75 +
lib/Target/AMDIL/AMDILTargetMachine.cpp | 182 +
lib/Target/AMDIL/AMDILTargetMachine.h | 72 +
lib/Target/AMDIL/AMDILUtilityFunctions.cpp | 683 +++
lib/Target/AMDIL/AMDILUtilityFunctions.h | 362 ++
lib/Target/AMDIL/CMakeLists.txt | 61 +
lib/Target/AMDIL/LLVMBuild.txt | 32 +
lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.cpp | 107 +
lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.h | 30 +
.../AMDIL/MCTargetDesc/AMDILMCTargetDesc.cpp | 66 +
lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.h | 36 +
lib/Target/AMDIL/MCTargetDesc/CMakeLists.txt | 7 +
lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt | 23 +
lib/Target/AMDIL/MCTargetDesc/Makefile | 16 +
lib/Target/AMDIL/Makefile | 23 +
lib/Target/AMDIL/R600ISelLowering.cpp | 116 +
lib/Target/AMDIL/R600ISelLowering.h | 43 +
lib/Target/AMDIL/R600InstrInfo.cpp | 123 +
lib/Target/AMDIL/R600InstrInfo.h | 74 +
lib/Target/AMDIL/R600RegisterInfo.cpp | 102 +
lib/Target/AMDIL/R600RegisterInfo.h | 44 +
lib/Target/AMDIL/SIISelLowering.cpp | 151 +
lib/Target/AMDIL/SIISelLowering.h | 44 +
lib/Target/AMDIL/SIInstrInfo.cpp | 135 +
lib/Target/AMDIL/SIInstrInfo.h | 92 +
lib/Target/AMDIL/SIMachineFunctionInfo.cpp | 22 +
lib/Target/AMDIL/SIMachineFunctionInfo.h | 35 +
lib/Target/AMDIL/SIRegisterInfo.cpp | 66 +
lib/Target/AMDIL/SIRegisterInfo.h | 46 +
lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp | 32 +
lib/Target/AMDIL/TargetInfo/CMakeLists.txt | 7 +
lib/Target/AMDIL/TargetInfo/LLVMBuild.txt | 23 +
lib/Target/AMDIL/TargetInfo/Makefile | 15 +
70 files changed, 13364 insertions(+), 0 deletions(-)
create mode 100644 lib/Target/AMDIL/AMDGPU.h
create mode 100644 lib/Target/AMDIL/AMDGPUISelLowering.cpp
create mode 100644 lib/Target/AMDIL/AMDGPUISelLowering.h
create mode 100644 lib/Target/AMDIL/AMDGPUInstrInfo.cpp
create mode 100644 lib/Target/AMDIL/AMDGPUInstrInfo.h
create mode 100644 lib/Target/AMDIL/AMDGPURegisterInfo.cpp
create mode 100644 lib/Target/AMDIL/AMDGPURegisterInfo.h
create mode 100644 lib/Target/AMDIL/AMDGPUTargetMachine.cpp
create mode 100644 lib/Target/AMDIL/AMDGPUTargetMachine.h
create mode 100644 lib/Target/AMDIL/AMDGPUUtil.cpp
create mode 100644 lib/Target/AMDIL/AMDGPUUtil.h
create mode 100644 lib/Target/AMDIL/AMDIL.h
create mode 100644 lib/Target/AMDIL/AMDIL7XXDevice.cpp
create mode 100644 lib/Target/AMDIL/AMDIL7XXDevice.h
create mode 100644 lib/Target/AMDIL/AMDILDevice.cpp
create mode 100644 lib/Target/AMDIL/AMDILDevice.h
create mode 100644 lib/Target/AMDIL/AMDILDeviceInfo.cpp
create mode 100644 lib/Target/AMDIL/AMDILDeviceInfo.h
create mode 100644 lib/Target/AMDIL/AMDILDevices.h
create mode 100644 lib/Target/AMDIL/AMDILEvergreenDevice.cpp
create mode 100644 lib/Target/AMDIL/AMDILEvergreenDevice.h
create mode 100644 lib/Target/AMDIL/AMDILFrameLowering.cpp
create mode 100644 lib/Target/AMDIL/AMDILFrameLowering.h
create mode 100644 lib/Target/AMDIL/AMDILISelDAGToDAG.cpp
create mode 100644 lib/Target/AMDIL/AMDILISelLowering.cpp
create mode 100644 lib/Target/AMDIL/AMDILISelLowering.h
create mode 100644 lib/Target/AMDIL/AMDILInstrInfo.cpp
create mode 100644 lib/Target/AMDIL/AMDILInstrInfo.h
create mode 100644 lib/Target/AMDIL/AMDILIntrinsicInfo.cpp
create mode 100644 lib/Target/AMDIL/AMDILIntrinsicInfo.h
create mode 100644 lib/Target/AMDIL/AMDILNIDevice.cpp
create mode 100644 lib/Target/AMDIL/AMDILNIDevice.h
create mode 100644 lib/Target/AMDIL/AMDILRegisterInfo.cpp
create mode 100644 lib/Target/AMDIL/AMDILRegisterInfo.h
create mode 100644 lib/Target/AMDIL/AMDILSIDevice.cpp
create mode 100644 lib/Target/AMDIL/AMDILSIDevice.h
create mode 100644 lib/Target/AMDIL/AMDILSubtarget.cpp
create mode 100644 lib/Target/AMDIL/AMDILSubtarget.h
create mode 100644 lib/Target/AMDIL/AMDILTargetMachine.cpp
create mode 100644 lib/Target/AMDIL/AMDILTargetMachine.h
create mode 100644 lib/Target/AMDIL/AMDILUtilityFunctions.cpp
create mode 100644 lib/Target/AMDIL/AMDILUtilityFunctions.h
create mode 100644 lib/Target/AMDIL/CMakeLists.txt
create mode 100644 lib/Target/AMDIL/LLVMBuild.txt
create mode 100644 lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.cpp
create mode 100644 lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.h
create mode 100644 lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.cpp
create mode 100644 lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.h
create mode 100644 lib/Target/AMDIL/MCTargetDesc/CMakeLists.txt
create mode 100644 lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt
create mode 100644 lib/Target/AMDIL/MCTargetDesc/Makefile
create mode 100644 lib/Target/AMDIL/Makefile
create mode 100644 lib/Target/AMDIL/R600ISelLowering.cpp
create mode 100644 lib/Target/AMDIL/R600ISelLowering.h
create mode 100644 lib/Target/AMDIL/R600InstrInfo.cpp
create mode 100644 lib/Target/AMDIL/R600InstrInfo.h
create mode 100644 lib/Target/AMDIL/R600RegisterInfo.cpp
create mode 100644 lib/Target/AMDIL/R600RegisterInfo.h
create mode 100644 lib/Target/AMDIL/SIISelLowering.cpp
create mode 100644 lib/Target/AMDIL/SIISelLowering.h
create mode 100644 lib/Target/AMDIL/SIInstrInfo.cpp
create mode 100644 lib/Target/AMDIL/SIInstrInfo.h
create mode 100644 lib/Target/AMDIL/SIMachineFunctionInfo.cpp
create mode 100644 lib/Target/AMDIL/SIMachineFunctionInfo.h
create mode 100644 lib/Target/AMDIL/SIRegisterInfo.cpp
create mode 100644 lib/Target/AMDIL/SIRegisterInfo.h
create mode 100644 lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp
create mode 100644 lib/Target/AMDIL/TargetInfo/CMakeLists.txt
create mode 100644 lib/Target/AMDIL/TargetInfo/LLVMBuild.txt
create mode 100644 lib/Target/AMDIL/TargetInfo/Makefile
diff --git a/lib/Target/AMDIL/AMDGPU.h b/lib/Target/AMDIL/AMDGPU.h
new file mode 100644
index 0000000..eff002a
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPU.h
@@ -0,0 +1,47 @@
+//===-- AMDGPU.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_H
+#define AMDGPU_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class AMDGPUTargetMachine;
+
+ FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
+ FunctionPass *createR600LowerShaderInstructionsPass(TargetMachine &tm);
+ FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm);
+
+ FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
+ FunctionPass *createSIInitMachineFunctionInfoPass(TargetMachine &tm);
+ FunctionPass *createSILowerShaderInstructionsPass(TargetMachine &tm);
+ FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm);
+ FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+
+ FunctionPass *createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm);
+
+ FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm);
+ FunctionPass *createAMDGPULowerShaderInstructionsPass(TargetMachine &tm);
+
+ FunctionPass *createAMDGPUDelimitInstGroupsPass(TargetMachine &tm);
+
+ FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+
+ FunctionPass *createAMDGPUFixRegClassesPass(TargetMachine &tm);
+
+} /* End namespace llvm */
+#endif /* AMDGPU_H */
diff --git a/lib/Target/AMDIL/AMDGPUISelLowering.cpp b/lib/Target/AMDIL/AMDGPUISelLowering.cpp
new file mode 100644
index 0000000..2c1052f
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPUISelLowering.cpp
@@ -0,0 +1,31 @@
+//===-- AMDGPUISelLowering.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPUUtil.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
+ AMDILTargetLowering(TM)
+{
+}
+
+void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
+ MachineFunction * MF, MachineRegisterInfo & MRI,
+ const struct TargetInstrInfo * TII, unsigned reg) const
+{
+ AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
+}
+
diff --git a/lib/Target/AMDIL/AMDGPUISelLowering.h b/lib/Target/AMDIL/AMDGPUISelLowering.h
new file mode 100644
index 0000000..3c5beb1
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPUISelLowering.h
@@ -0,0 +1,35 @@
+//===-- AMDGPUISelLowering.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUISELLOWERING_H
+#define AMDGPUISELLOWERING_H
+
+#include "AMDILISelLowering.h"
+
+namespace llvm {
+
+class AMDGPUTargetLowering : public AMDILTargetLowering
+{
+protected:
+ void addLiveIn(MachineInstr * MI, MachineFunction * MF,
+ MachineRegisterInfo & MRI, const struct TargetInstrInfo * TII,
+ unsigned reg) const;
+
+public:
+ AMDGPUTargetLowering(TargetMachine &TM);
+
+};
+
+} /* End namespace llvm */
+
+#endif /* AMDGPUISELLOWERING_H */
diff --git a/lib/Target/AMDIL/AMDGPUInstrInfo.cpp b/lib/Target/AMDIL/AMDGPUInstrInfo.cpp
new file mode 100644
index 0000000..4742283
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPUInstrInfo.cpp
@@ -0,0 +1,116 @@
+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the TargetInstrInfo class that is
+// common to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+AMDGPUInstrInfo::AMDGPUInstrInfo(AMDGPUTargetMachine &tm)
+ : AMDILInstrInfo(tm), TM(tm)
+{
+ const AMDILDevice * dev = TM.getSubtarget<AMDILSubtarget>().device();
+ for (unsigned i = 0; i < AMDIL::INSTRUCTION_LIST_END; i++) {
+ const MCInstrDesc & instDesc = get(i);
+ uint32_t instGen = (instDesc.TSFlags >> 40) & 0x7;
+ uint32_t inst = (instDesc.TSFlags >> 48) & 0xffff;
+ if (inst == 0) {
+ continue;
+ }
+ switch (instGen) {
+ case AMDGPUInstrInfo::R600_CAYMAN:
+ if (dev->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ continue;
+ }
+ break;
+ case AMDGPUInstrInfo::R600:
+ if (dev->getGeneration() != AMDILDeviceInfo::HD4XXX) {
+ continue;
+ }
+ break;
+ case AMDGPUInstrInfo::EG_CAYMAN:
+ if (dev->getGeneration() < AMDILDeviceInfo::HD5XXX
+ || dev->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ continue;
+ }
+ break;
+ case AMDGPUInstrInfo::CAYMAN:
+ if (dev->getDeviceFlag() != OCL_DEVICE_CAYMAN) {
+ continue;
+ }
+ break;
+ case AMDGPUInstrInfo::SI:
+ if (dev->getGeneration() != AMDILDeviceInfo::HD7XXX) {
+ continue;
+ }
+ break;
+ default:
+ abort();
+ break;
+ }
+
+ unsigned amdilOpcode = GetRealAMDILOpcode(inst);
+ amdilToISA[amdilOpcode] = instDesc.Opcode;
+ }
+}
+
+MachineInstr * AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const
+{
+ MachineInstrBuilder newInstr;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AMDGPURegisterInfo & RI = getRegisterInfo();
+ unsigned ISAOpcode = getISAOpcode(MI.getOpcode());
+
+ /* Create the new instruction */
+ newInstr = BuildMI(MF, DL, TM.getInstrInfo()->get(ISAOpcode));
+
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ MachineOperand &MO = MI.getOperand(i);
+ /* Convert dst regclass to one that is supported by the ISA */
+ if (MO.isReg() && MO.isDef()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
+
+ assert(newRegClass);
+
+ MRI.setRegClass(MO.getReg(), newRegClass);
+ }
+ }
+ /* Add the operand to the new instruction */
+ newInstr.addOperand(MO);
+ }
+
+ return newInstr;
+}
+
+unsigned AMDGPUInstrInfo::getISAOpcode(unsigned opcode) const
+{
+ if (amdilToISA.count(opcode) == 0) {
+ return opcode;
+ } else {
+ return amdilToISA.find(opcode)->second;
+ }
+}
+
+bool AMDGPUInstrInfo::isRegPreload(const MachineInstr &MI) const
+{
+ return (get(MI.getOpcode()).TSFlags >> AMDGPU_TFLAG_SHIFTS::PRELOAD_REG) & 0x1;
+}
+
+#include "AMDGPUInstrEnums.include"
diff --git a/lib/Target/AMDIL/AMDGPUInstrInfo.h b/lib/Target/AMDIL/AMDGPUInstrInfo.h
new file mode 100644
index 0000000..fa009bc
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPUInstrInfo.h
@@ -0,0 +1,59 @@
+//===-- AMDGPUInstrInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTRUCTIONINFO_H_
+#define AMDGPUINSTRUCTIONINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDILInstrInfo.h"
+
+#include <map>
+
+namespace llvm {
+
+ class AMDGPUTargetMachine;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineInstrBuilder;
+
+ class AMDGPUInstrInfo : public AMDILInstrInfo {
+ private:
+ AMDGPUTargetMachine & TM;
+ std::map<unsigned, unsigned> amdilToISA;
+
+ public:
+ explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm);
+
+ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+
+ virtual unsigned getISAOpcode(unsigned AMDILopcode) const;
+
+ virtual MachineInstr * convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const;
+
+ bool isRegPreload(const MachineInstr &MI) const;
+
+ #include "AMDGPUInstrEnums.h.include"
+ };
+
+} // End llvm namespace
+
+/* AMDGPU target flags are stored in bits 32-39 */
+namespace AMDGPU_TFLAG_SHIFTS {
+ enum TFLAGS {
+ PRELOAD_REG = 32
+ };
+}
+
+
+#endif // AMDGPUINSTRINFO_H_
diff --git a/lib/Target/AMDIL/AMDGPURegisterInfo.cpp b/lib/Target/AMDIL/AMDGPURegisterInfo.cpp
new file mode 100644
index 0000000..162a491
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPURegisterInfo.cpp
@@ -0,0 +1,24 @@
+//===-- AMDGPURegisterInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+AMDGPURegisterInfo::AMDGPURegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDILRegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
diff --git a/lib/Target/AMDIL/AMDGPURegisterInfo.h b/lib/Target/AMDIL/AMDGPURegisterInfo.h
new file mode 100644
index 0000000..f4492e9
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPURegisterInfo.h
@@ -0,0 +1,38 @@
+//===-- AMDGPURegisterInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUREGISTERINFO_H_
+#define AMDGPUREGISTERINFO_H_
+
+#include "AMDILRegisterInfo.h"
+
+namespace llvm {
+
+ class AMDGPUTargetMachine;
+ class TargetInstrInfo;
+
+ struct AMDGPURegisterInfo : public AMDILRegisterInfo
+ {
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ AMDGPURegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
+
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass * rc) const = 0;
+ };
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/lib/Target/AMDIL/AMDGPUTargetMachine.cpp b/lib/Target/AMDIL/AMDGPUTargetMachine.cpp
new file mode 100644
index 0000000..313349c
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPUTargetMachine.cpp
@@ -0,0 +1,161 @@
+//===-- AMDGPUTargetMachine.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
+#include "AMDILTargetMachine.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "R600KernelParameters.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OptLevel
+)
+:
+ AMDILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+ Subtarget(TT, CPU, FS),
+ mDump(false)
+
+{
+ /* TLInfo uses InstrInfo so it must be initialized after. */
+ if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ InstrInfo = new R600InstrInfo(*this);
+ TLInfo = new R600TargetLowering(*this);
+ } else {
+ InstrInfo = new SIInstrInfo(*this);
+ TLInfo = new SITargetLowering(*this);
+ }
+}
+
+AMDGPUTargetMachine::~AMDGPUTargetMachine()
+{
+}
+
+bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ bool DisableVerify) {
+ /* XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are
+ * only using it to access addPassesToGenerateCode() */
+ bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
+ DisableVerify);
+ assert(fail);
+
+ const AMDILSubtarget &STM = getSubtarget<AMDILSubtarget>();
+ std::string gpu = STM.getDeviceName();
+ if (gpu == "SI") {
+ PM.add(createSICodeEmitterPass(Out));
+ } else if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ PM.add(createR600CodeEmitterPass(Out));
+ } else {
+ abort();
+ return true;
+ }
+ PM.add(createGCInfoDeleter());
+
+ return false;
+}
+
+namespace {
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+ return getTM<AMDGPUTargetMachine>();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // End of anonymous namespace
+
+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AMDGPUPassConfig(this, PM);
+}
+
+bool
+AMDGPUPassConfig::addPreISel()
+{
+ const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
+ if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ PM.add(createR600KernelParametersPass(
+ getAMDGPUTargetMachine().getTargetData()));
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addInstSelector() {
+ PM.add(createAMDILPeepholeOpt(*TM));
+ PM.add(createAMDILISelDag(getAMDGPUTargetMachine()));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreRegAlloc() {
+ const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
+
+ PM.add(createAMDGPUReorderPreloadInstructionsPass(*TM));
+ if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ PM.add(createR600LowerShaderInstructionsPass(*TM));
+ PM.add(createR600LowerInstructionsPass(*TM));
+ } else {
+ PM.add(createSILowerShaderInstructionsPass(*TM));
+ PM.add(createSIAssignInterpRegsPass(*TM));
+ }
+ PM.add(createAMDGPULowerInstructionsPass(*TM));
+ PM.add(createAMDGPUConvertToISAPass(*TM));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPostRegAlloc() {
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreSched2() {
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreEmitPass() {
+ const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
+ PM.add(createAMDILCFGPreparationPass(*TM));
+ PM.add(createAMDILCFGStructurizerPass(*TM));
+ if (ST.device()->getGeneration() == AMDILDeviceInfo::HD7XXX) {
+ PM.add(createSIPropagateImmReadsPass(*TM));
+ }
+
+ return false;
+}
+
diff --git a/lib/Target/AMDIL/AMDGPUTargetMachine.h b/lib/Target/AMDIL/AMDGPUTargetMachine.h
new file mode 100644
index 0000000..d4165b0
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPUTargetMachine.h
@@ -0,0 +1,62 @@
+//===-- AMDGPUTargetMachine.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_TARGET_MACHINE_H
+#define AMDGPU_TARGET_MACHINE_H
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDILTargetMachine.h"
+#include "R600ISelLowering.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
+
+class AMDGPUTargetMachine : public AMDILTargetMachine {
+ AMDILSubtarget Subtarget;
+ const AMDGPUInstrInfo * InstrInfo;
+ AMDGPUTargetLowering * TLInfo;
+ AMDILGlobalManager *mGM;
+ AMDILKernelManager *mKM;
+ bool mDump;
+
+public:
+ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ ~AMDGPUTargetMachine();
+ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
+ virtual const AMDILSubtarget *getSubtargetImpl() const {return &Subtarget; }
+ virtual const AMDGPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+ virtual AMDGPUTargetLowering * getTargetLowering() const {
+ return TLInfo;
+ }
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ bool DisableVerify);
+public:
+ void dumpCode() { mDump = true; }
+ bool shouldDumpCode() const { return mDump; }
+};
+
+} /* End namespace llvm */
+
+#endif /* AMDGPU_TARGET_MACHINE_H */
diff --git a/lib/Target/AMDIL/AMDGPUUtil.cpp b/lib/Target/AMDIL/AMDGPUUtil.cpp
new file mode 100644
index 0000000..a504543
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPUUtil.cpp
@@ -0,0 +1,126 @@
+//===-- AMDGPUUtil.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUUtil.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+/* Some instructions act as place holders to emulate operations that the GPU
+ * hardware does automatically. This function can be used to check if
+ * an opcode falls into this category. */
+bool llvm::isPlaceHolderOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ default: return false;
+ case AMDIL::EXPORT_REG:
+ case AMDIL::RETURN:
+ case AMDIL::LOAD_INPUT:
+ case AMDIL::LAST:
+ case AMDIL::RESERVE_REG:
+ return true;
+ }
+}
+
+bool llvm::isTransOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+
+ case AMDIL::COS_f32:
+ case AMDIL::COS_r600:
+ case AMDIL::COS_eg:
+ case AMDIL::RSQ_f32:
+ case AMDIL::FTOI:
+ case AMDIL::ITOF:
+ case AMDIL::MULLIT:
+ case AMDIL::MUL_LIT_r600:
+ case AMDIL::MUL_LIT_eg:
+ case AMDIL::SHR_i32:
+ case AMDIL::SIN_f32:
+ case AMDIL::EXP_f32:
+ case AMDIL::EXP_IEEE_r600:
+ case AMDIL::EXP_IEEE_eg:
+ case AMDIL::LOG_CLAMPED_r600:
+ case AMDIL::LOG_IEEE_r600:
+ case AMDIL::LOG_CLAMPED_eg:
+ case AMDIL::LOG_IEEE_eg:
+ case AMDIL::LOG_f32:
+ return true;
+ }
+}
+
+bool llvm::isTexOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::TEX_SAMPLE:
+ case AMDIL::TEX_SAMPLE_C:
+ case AMDIL::TEX_SAMPLE_L:
+ case AMDIL::TEX_SAMPLE_C_L:
+ case AMDIL::TEX_SAMPLE_LB:
+ case AMDIL::TEX_SAMPLE_C_LB:
+ case AMDIL::TEX_SAMPLE_G:
+ case AMDIL::TEX_SAMPLE_C_G:
+ return true;
+ }
+}
+
+bool llvm::isReductionOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::DOT4_r600:
+ case AMDIL::DOT4_eg:
+ return true;
+ }
+}
+
+bool llvm::isFCOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::BREAK_LOGICALZ_f32:
+ case AMDIL::BREAK_LOGICALNZ_i32:
+ case AMDIL::BREAK_LOGICALZ_i32:
+ case AMDIL::CONTINUE_LOGICALNZ_f32:
+ case AMDIL::IF_LOGICALNZ_i32:
+ case AMDIL::IF_LOGICALZ_f32:
+ case AMDIL::ELSE:
+ case AMDIL::ENDIF:
+ case AMDIL::ENDLOOP:
+ case AMDIL::IF_LOGICALNZ_f32:
+ case AMDIL::WHILELOOP:
+ return true;
+ }
+}
+
+void AMDGPU::utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
+ const struct TargetInstrInfo * TII, unsigned physReg, unsigned virtReg)
+{
+ if (!MRI.isLiveIn(physReg)) {
+ MRI.addLiveIn(physReg, virtReg);
+ BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), virtReg)
+ .addReg(physReg);
+ } else {
+ MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
+ }
+}
diff --git a/lib/Target/AMDIL/AMDGPUUtil.h b/lib/Target/AMDIL/AMDGPUUtil.h
new file mode 100644
index 0000000..299146e
--- /dev/null
+++ b/lib/Target/AMDIL/AMDGPUUtil.h
@@ -0,0 +1,49 @@
+//===-- AMDGPUUtil.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_UTIL_H
+#define AMDGPU_UTIL_H
+
+#include "AMDGPURegisterInfo.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class AMDILMachineFunctionInfo;
+
+class TargetMachine;
+class TargetRegisterInfo;
+
+bool isPlaceHolderOpcode(unsigned opcode);
+
+bool isTransOp(unsigned opcode);
+bool isTexOp(unsigned opcode);
+bool isReductionOp(unsigned opcode);
+bool isFCOp(unsigned opcode);
+
+/* XXX: Move these to AMDGPUInstrInfo.h */
+#define MO_FLAG_CLAMP (1 << 0)
+#define MO_FLAG_NEG (1 << 1)
+#define MO_FLAG_ABS (1 << 2)
+#define MO_FLAG_MASK (1 << 3)
+
+} /* End namespace llvm */
+
+namespace AMDGPU {
+
+void utilAddLiveIn(llvm::MachineFunction * MF, llvm::MachineRegisterInfo & MRI,
+ const struct llvm::TargetInstrInfo * TII, unsigned physReg, unsigned virtReg);
+
+} // End namespace AMDGPU
+
+#endif /* AMDGPU_UTIL_H */
diff --git a/lib/Target/AMDIL/AMDIL.h b/lib/Target/AMDIL/AMDIL.h
new file mode 100644
index 0000000..317ea12
--- /dev/null
+++ b/lib/Target/AMDIL/AMDIL.h
@@ -0,0 +1,280 @@
+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AMDIL back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H_
+#define AMDIL_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define AMDIL_MAJOR_VERSION 2
+#define AMDIL_MINOR_VERSION 0
+#define AMDIL_REVISION_NUMBER 74
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID 1
+#define DEFAULT_GDS_ID 1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS 8
+
+// SC->CAL version matchings.
+#define CAL_VERSION_SC_150 1700
+#define CAL_VERSION_SC_149 1700
+#define CAL_VERSION_SC_148 1525
+#define CAL_VERSION_SC_147 1525
+#define CAL_VERSION_SC_146 1525
+#define CAL_VERSION_SC_145 1451
+#define CAL_VERSION_SC_144 1451
+#define CAL_VERSION_SC_143 1441
+#define CAL_VERSION_SC_142 1441
+#define CAL_VERSION_SC_141 1420
+#define CAL_VERSION_SC_140 1400
+#define CAL_VERSION_SC_139 1387
+#define CAL_VERSION_SC_138 1387
+#define CAL_APPEND_BUFFER_SUPPORT 1340
+#define CAL_VERSION_SC_137 1331
+#define CAL_VERSION_SC_136 982
+#define CAL_VERSION_SC_135 950
+#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990
+
+#define OCL_DEVICE_RV710 0x0001
+#define OCL_DEVICE_RV730 0x0002
+#define OCL_DEVICE_RV770 0x0004
+#define OCL_DEVICE_CEDAR 0x0008
+#define OCL_DEVICE_REDWOOD 0x0010
+#define OCL_DEVICE_JUNIPER 0x0020
+#define OCL_DEVICE_CYPRESS 0x0040
+#define OCL_DEVICE_CAICOS 0x0080
+#define OCL_DEVICE_TURKS 0x0100
+#define OCL_DEVICE_BARTS 0x0200
+#define OCL_DEVICE_CAYMAN 0x0400
+#define OCL_DEVICE_ALL 0x3FFF
+
+/// The number of function ID's that are reserved for
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+#define AMDIL_OPT_LEVEL_DECL
+#define AMDIL_OPT_LEVEL_VAR
+#define AMDIL_OPT_LEVEL_VAR_NO_COMMA
+
+namespace llvm {
+class AMDILInstrPrinter;
+class AMDILTargetMachine;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+/// Instruction selection passes.
+FunctionPass*
+ createAMDILISelDag(AMDILTargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILPeepholeOpt(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+
+/// Pre regalloc passes.
+FunctionPass*
+ createAMDILMachinePeephole(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+
+/// Pre emit passes.
+FunctionPass*
+ createAMDILCFGPreparationPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+
+extern Target TheAMDILTarget;
+extern Target TheAMDGPUTarget;
+} // end namespace llvm;
+
+#define GET_REGINFO_ENUM
+#include "AMDILGenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "AMDILGenInstrInfo.inc"
+
+/// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm {
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDILAS {
+enum AddressSpaces {
+ PRIVATE_ADDRESS = 0, // Address space for private memory.
+ GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
+ CONSTANT_ADDRESS = 2, // Address space for constant memory.
+ LOCAL_ADDRESS = 3, // Address space for local memory.
+ REGION_ADDRESS = 4, // Address space for region memory.
+ ADDRESS_NONE = 5, // Address space for unknown memory.
+ PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
+ PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
+ LAST_ADDRESS = 8
+};
+
+// We are piggybacking on the CommentFlag enum in MachineInstr.h to
+// set bits in AsmPrinterFlags of the MachineInstruction. We will
+// start at bit 16 and allocate down while LLVM will start at bit
+// 1 and allocate up.
+
+// This union/struct combination is an easy way to read out the
+// exact bits that are needed.
+typedef union ResourceRec {
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned short isImage : 1; // Reserved for future use/llvm.
+ unsigned short ResourceID : 10; // Flag to specify the resourece ID for
+ // the op.
+ unsigned short HardwareInst : 1; // Flag to specify that this instruction
+ // is a hardware instruction.
+ unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a
+ // conflict.
+ unsigned short ByteStore : 1; // Flag to specify if the op is a byte
+ // store op.
+ unsigned short PointerPath : 1; // Flag to specify if the op is on the
+ // pointer path.
+ unsigned short CacheableRead : 1; // Flag to specify if the read is
+ // cacheable.
+#else
+ unsigned short CacheableRead : 1; // Flag to specify if the read is
+ // cacheable.
+ unsigned short PointerPath : 1; // Flag to specify if the op is on the
+ // pointer path.
+ unsigned short ByteStore : 1; // Flag to specify if the op is byte
+ // store op.
+ unsigned short ConflictPtr : 1; // Flag to specify that the pointer has
+ // a conflict.
+ unsigned short HardwareInst : 1; // Flag to specify that this instruction
+ // is a hardware instruction.
+ unsigned short ResourceID : 10; // Flag to specify the resource ID for
+ // the op.
+ unsigned short isImage : 1; // Reserved for future use.
+#endif
+ } bits;
+ unsigned short u16all;
+} InstrResEnc;
+
+} // namespace AMDILAS
+
+// The OpSwizzle encodes a subset of all possible
+// swizzle combinations into a number of bits using
+// only the combinations utilized by the backend.
+// The lower 128 are for source swizzles and the
+// upper 128 or for destination swizzles.
+// The valid mappings can be found in the
+// getSrcSwizzle and getDstSwizzle functions of
+// AMDILUtilityFunctions.cpp.
+typedef union SwizzleRec {
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned char dst : 1;
+ unsigned char swizzle : 7;
+#else
+ unsigned char swizzle : 7;
+ unsigned char dst : 1;
+#endif
+ } bits;
+ unsigned char u8all;
+} OpSwizzle;
+// Enums corresponding to AMDIL condition codes for IL. These
+// values must be kept in sync with the ones in the .td file.
+namespace AMDILCC {
+enum CondCodes {
+ // AMDIL specific condition codes. These correspond to the IL_CC_*
+ // in AMDILInstrInfo.td and must be kept in the same order.
+ IL_CC_D_EQ = 0, // DEQ instruction.
+ IL_CC_D_GE = 1, // DGE instruction.
+ IL_CC_D_LT = 2, // DLT instruction.
+ IL_CC_D_NE = 3, // DNE instruction.
+ IL_CC_F_EQ = 4, // EQ instruction.
+ IL_CC_F_GE = 5, // GE instruction.
+ IL_CC_F_LT = 6, // LT instruction.
+ IL_CC_F_NE = 7, // NE instruction.
+ IL_CC_I_EQ = 8, // IEQ instruction.
+ IL_CC_I_GE = 9, // IGE instruction.
+ IL_CC_I_LT = 10, // ILT instruction.
+ IL_CC_I_NE = 11, // INE instruction.
+ IL_CC_U_GE = 12, // UGE instruction.
+ IL_CC_U_LT = 13, // ULE instruction.
+ // Pseudo IL Comparison instructions here.
+ IL_CC_F_GT = 14, // GT instruction.
+ IL_CC_U_GT = 15,
+ IL_CC_I_GT = 16,
+ IL_CC_D_GT = 17,
+ IL_CC_F_LE = 18, // LE instruction
+ IL_CC_U_LE = 19,
+ IL_CC_I_LE = 20,
+ IL_CC_D_LE = 21,
+ IL_CC_F_UNE = 22,
+ IL_CC_F_UEQ = 23,
+ IL_CC_F_ULT = 24,
+ IL_CC_F_UGT = 25,
+ IL_CC_F_ULE = 26,
+ IL_CC_F_UGE = 27,
+ IL_CC_F_ONE = 28,
+ IL_CC_F_OEQ = 29,
+ IL_CC_F_OLT = 30,
+ IL_CC_F_OGT = 31,
+ IL_CC_F_OLE = 32,
+ IL_CC_F_OGE = 33,
+ IL_CC_D_UNE = 34,
+ IL_CC_D_UEQ = 35,
+ IL_CC_D_ULT = 36,
+ IL_CC_D_UGT = 37,
+ IL_CC_D_ULE = 38,
+ IL_CC_D_UGE = 39,
+ IL_CC_D_ONE = 40,
+ IL_CC_D_OEQ = 41,
+ IL_CC_D_OLT = 42,
+ IL_CC_D_OGT = 43,
+ IL_CC_D_OLE = 44,
+ IL_CC_D_OGE = 45,
+ IL_CC_U_EQ = 46,
+ IL_CC_U_NE = 47,
+ IL_CC_F_O = 48,
+ IL_CC_D_O = 49,
+ IL_CC_F_UO = 50,
+ IL_CC_D_UO = 51,
+ IL_CC_L_LE = 52,
+ IL_CC_L_GE = 53,
+ IL_CC_L_EQ = 54,
+ IL_CC_L_NE = 55,
+ IL_CC_L_LT = 56,
+ IL_CC_L_GT = 57,
+ IL_CC_UL_LE = 58,
+ IL_CC_UL_GE = 59,
+ IL_CC_UL_EQ = 60,
+ IL_CC_UL_NE = 61,
+ IL_CC_UL_LT = 62,
+ IL_CC_UL_GT = 63,
+ COND_ERROR = 64
+};
+
+} // end namespace AMDILCC
+} // end namespace llvm
+#endif // AMDIL_H_
diff --git a/lib/Target/AMDIL/AMDIL7XXDevice.cpp b/lib/Target/AMDIL/AMDIL7XXDevice.cpp
new file mode 100644
index 0000000..e1b6b8f
--- /dev/null
+++ b/lib/Target/AMDIL/AMDIL7XXDevice.cpp
@@ -0,0 +1,128 @@
+//===-- AMDIL7XXDevice.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXDevice.h"
+#include "AMDILDevice.h"
+
+using namespace llvm;
+
+AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
+{
+ setCaps();
+ std::string name = mSTM->getDeviceName();
+ if (name == "rv710") {
+ mDeviceFlag = OCL_DEVICE_RV710;
+ } else if (name == "rv730") {
+ mDeviceFlag = OCL_DEVICE_RV730;
+ } else {
+ mDeviceFlag = OCL_DEVICE_RV770;
+ }
+}
+
+AMDIL7XXDevice::~AMDIL7XXDevice()
+{
+}
+
+void AMDIL7XXDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL7XXDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_700;
+ }
+ return 0;
+}
+
+size_t AMDIL7XXDevice::getWavefrontSize() const
+{
+ return AMDILDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDIL7XXDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
+{
+ switch (DeviceID) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case GLOBAL_ID:
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ case ARENA_UAV_ID:
+ break;
+ case LDS_ID:
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ }
+ break;
+ case SCRATCH_ID:
+ if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ }
+ break;
+ case GDS_ID:
+ assert(0 && "GDS UAV ID is not supported on this chip");
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ }
+ break;
+ };
+
+ return 0;
+}
+
+uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
+{
+ return 1;
+}
+
+AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
+{
+ setCaps();
+}
+
+AMDIL770Device::~AMDIL770Device()
+{
+}
+
+void AMDIL770Device::setCaps()
+{
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ }
+ mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+ mHWBits.reset(AMDILDeviceInfo::LongOps);
+ mSWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL770Device::getWavefrontSize() const
+{
+ return AMDILDevice::WavefrontSize;
+}
+
+AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
+{
+}
+
+AMDIL710Device::~AMDIL710Device()
+{
+}
+
+size_t AMDIL710Device::getWavefrontSize() const
+{
+ return AMDILDevice::QuarterWavefrontSize;
+}
diff --git a/lib/Target/AMDIL/AMDIL7XXDevice.h b/lib/Target/AMDIL/AMDIL7XXDevice.h
new file mode 100644
index 0000000..4d8d47a
--- /dev/null
+++ b/lib/Target/AMDIL/AMDIL7XXDevice.h
@@ -0,0 +1,71 @@
+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL7XXDEVICEIMPL_H_
+#define _AMDIL7XXDEVICEIMPL_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+class AMDILSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
+// devices are derived from this class. The AMDIL7XX device will only
+// support the minimal features that are required to be considered OpenCL 1.0
+// compliant and nothing more.
+class AMDIL7XXDevice : public AMDILDevice {
+public:
+ AMDIL7XXDevice(AMDILSubtarget *ST);
+ virtual ~AMDIL7XXDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
+ virtual uint32_t getMaxNumUAVs() const;
+
+protected:
+ virtual void setCaps();
+}; // AMDIL7XXDevice
+
+// The AMDIL770Device class represents the RV770 chip and it's
+// derivative cards. The difference between this device and the base
+// class is this device device adds support for double precision
+// and has a larger wavefront size.
+class AMDIL770Device : public AMDIL7XXDevice {
+public:
+ AMDIL770Device(AMDILSubtarget *ST);
+ virtual ~AMDIL770Device();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDIL770Device
+
+// The AMDIL710Device class derives from the 7XX base class, but this
+// class is a smaller derivative, so we need to overload some of the
+// functions in order to correctly specify this information.
+class AMDIL710Device : public AMDIL7XXDevice {
+public:
+ AMDIL710Device(AMDILSubtarget *ST);
+ virtual ~AMDIL710Device();
+ virtual size_t getWavefrontSize() const;
+}; // AMDIL710Device
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
diff --git a/lib/Target/AMDIL/AMDILDevice.cpp b/lib/Target/AMDIL/AMDILDevice.cpp
new file mode 100644
index 0000000..aa6d8af
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILDevice.cpp
@@ -0,0 +1,137 @@
+//===-- AMDILDevice.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST)
+{
+ mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+ mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+ setCaps();
+ mDeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDILDevice::~AMDILDevice()
+{
+ mHWBits.clear();
+ mSWBits.clear();
+}
+
+size_t AMDILDevice::getMaxGDSSize() const
+{
+ return 0;
+}
+
+uint32_t
+AMDILDevice::getDeviceFlag() const
+{
+ return mDeviceFlag;
+}
+
+size_t AMDILDevice::getMaxNumCBs() const
+{
+ if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ return HW_MAX_NUM_CB;
+ }
+
+ return 0;
+}
+
+size_t AMDILDevice::getMaxCBSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ return MAX_CB_SIZE;
+ }
+
+ return 0;
+}
+
+size_t AMDILDevice::getMaxScratchSize() const
+{
+ return 65536;
+}
+
+uint32_t AMDILDevice::getStackAlignment() const
+{
+ return 16;
+}
+
+void AMDILDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::HalfOps);
+ mSWBits.set(AMDILDeviceInfo::ByteOps);
+ mSWBits.set(AMDILDeviceInfo::ShortOps);
+ mSWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) {
+ mSWBits.set(AMDILDeviceInfo::NoInline);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) {
+ mSWBits.set(AMDILDeviceInfo::MacroDB);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::ConstantMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::ConstantMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::PrivateMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::PrivateMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) {
+ mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+ }
+ mSWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.set(AMDILDeviceInfo::LongOps);
+}
+
+AMDILDeviceInfo::ExecutionMode
+AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
+{
+ if (mHWBits[Caps]) {
+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDILDeviceInfo::Hardware;
+ }
+
+ if (mSWBits[Caps]) {
+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDILDeviceInfo::Software;
+ }
+
+ return AMDILDeviceInfo::Unsupported;
+
+}
+
+bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
+}
+
+bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
+}
+
+bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
+}
+
+std::string
+AMDILDevice::getDataLayout() const
+{
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
diff --git a/lib/Target/AMDIL/AMDILDevice.h b/lib/Target/AMDIL/AMDILDevice.h
new file mode 100644
index 0000000..706dd82
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILDevice.h
@@ -0,0 +1,116 @@
+//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILDEVICEIMPL_H_
+#define _AMDILDEVICEIMPL_H_
+#include "AMDIL.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+ class AMDILSubtarget;
+ class MCStreamer;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDILDevice {
+public:
+ AMDILDevice(AMDILSubtarget *ST);
+ virtual ~AMDILDevice();
+
+ // Enum values for the various memory types.
+ enum {
+ RAW_UAV_ID = 0,
+ ARENA_UAV_ID = 1,
+ LDS_ID = 2,
+ GDS_ID = 3,
+ SCRATCH_ID = 4,
+ CONSTANT_ID = 5,
+ GLOBAL_ID = 6,
+ MAX_IDS = 7
+ } IO_TYPE_IDS;
+
+ // Returns the max LDS size that the hardware supports. Size is in
+ // bytes.
+ virtual size_t getMaxLDSSize() const = 0;
+
+ // Returns the max GDS size that the hardware supports if the GDS is
+ // supported by the hardware. Size is in bytes.
+ virtual size_t getMaxGDSSize() const;
+
+ // Returns the max number of hardware constant address spaces that
+ // are supported by this device.
+ virtual size_t getMaxNumCBs() const;
+
+ // Returns the max number of bytes a single hardware constant buffer
+ // can support. Size is in bytes.
+ virtual size_t getMaxCBSize() const;
+
+ // Returns the max number of bytes allowed by the hardware scratch
+ // buffer. Size is in bytes.
+ virtual size_t getMaxScratchSize() const;
+
+ // Get the flag that corresponds to the device.
+ virtual uint32_t getDeviceFlag() const;
+
+ // Returns the number of work-items that exist in a single hardware
+ // wavefront.
+ virtual size_t getWavefrontSize() const = 0;
+
+ // Get the generational name of this specific device.
+ virtual uint32_t getGeneration() const = 0;
+
+ // Get the stack alignment of this specific device.
+ virtual uint32_t getStackAlignment() const;
+
+ // Get the resource ID for this specific device.
+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+ // Get the max number of UAV's for this device.
+ virtual uint32_t getMaxNumUAVs() const = 0;
+
+
+ // API utilizing more detailed capabilities of each family of
+ // cards. If a capability is supported, then either usesHardware or
+ // usesSoftware returned true. If usesHardware returned true, then
+ // usesSoftware must return false for the same capability. Hardware
+ // execution means that the feature is done natively by the hardware
+ // and is not emulated by the softare. Software execution means
+ // that the feature could be done in the hardware, but there is
+ // software that emulates it with possibly using the hardware for
+ // support since the hardware does not fully comply with OpenCL
+ // specs.
+ bool isSupported(AMDILDeviceInfo::Caps Mode) const;
+ bool usesHardware(AMDILDeviceInfo::Caps Mode) const;
+ bool usesSoftware(AMDILDeviceInfo::Caps Mode) const;
+ virtual std::string getDataLayout() const;
+ static const unsigned int MAX_LDS_SIZE_700 = 16384;
+ static const unsigned int MAX_LDS_SIZE_800 = 32768;
+ static const unsigned int WavefrontSize = 64;
+ static const unsigned int HalfWavefrontSize = 32;
+ static const unsigned int QuarterWavefrontSize = 16;
+protected:
+ virtual void setCaps();
+ llvm::BitVector mHWBits;
+ llvm::BitVector mSWBits;
+ AMDILSubtarget *mSTM;
+ uint32_t mDeviceFlag;
+private:
+ AMDILDeviceInfo::ExecutionMode
+ getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
+}; // AMDILDevice
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
diff --git a/lib/Target/AMDIL/AMDILDeviceInfo.cpp b/lib/Target/AMDIL/AMDILDeviceInfo.cpp
new file mode 100644
index 0000000..89b8312
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILDeviceInfo.cpp
@@ -0,0 +1,87 @@
+//===-- AMDILDeviceInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDILSubtarget.h"
+
+using namespace llvm;
+namespace llvm {
+ AMDILDevice*
+getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
+{
+ if (deviceName.c_str()[2] == '7') {
+ switch (deviceName.c_str()[3]) {
+ case '1':
+ return new AMDIL710Device(ptr);
+ case '7':
+ return new AMDIL770Device(ptr);
+ default:
+ return new AMDIL7XXDevice(ptr);
+ };
+ } else if (deviceName == "cypress") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCypressDevice(ptr);
+ } else if (deviceName == "juniper") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILEvergreenDevice(ptr);
+ } else if (deviceName == "redwood") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILRedwoodDevice(ptr);
+ } else if (deviceName == "cedar") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCedarDevice(ptr);
+ } else if (deviceName == "barts"
+ || deviceName == "turks") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILNIDevice(ptr);
+ } else if (deviceName == "cayman") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCaymanDevice(ptr);
+ } else if (deviceName == "caicos") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILNIDevice(ptr);
+ } else if (deviceName == "SI") {
+ return new AMDILSIDevice(ptr);
+ } else {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDIL7XXDevice(ptr);
+ }
+}
+}
diff --git a/lib/Target/AMDIL/AMDILDeviceInfo.h b/lib/Target/AMDIL/AMDILDeviceInfo.h
new file mode 100644
index 0000000..c4acf91
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILDeviceInfo.h
@@ -0,0 +1,89 @@
+//===-- AMDILDeviceInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDILDEVICEINFO_H_
+#define _AMDILDEVICEINFO_H_
+
+
+#include <string>
+
+namespace llvm
+{
+ class AMDILDevice;
+ class AMDILSubtarget;
+ namespace AMDILDeviceInfo
+ {
+ // Each Capabilities can be executed using a hardware instruction,
+ // emulated with a sequence of software instructions, or not
+ // supported at all.
+ enum ExecutionMode {
+ Unsupported = 0, // Unsupported feature on the card(Default value)
+ Software, // This is the execution mode that is set if the
+ // feature is emulated in software
+ Hardware // This execution mode is set if the feature exists
+ // natively in hardware
+ };
+
+ // Any changes to this needs to have a corresponding update to the
+ // twiki page GPUMetadataABI
+ enum Caps {
+ HalfOps = 0x1, // Half float is supported or not.
+ DoubleOps = 0x2, // Double is supported or not.
+ ByteOps = 0x3, // Byte(char) is support or not.
+ ShortOps = 0x4, // Short is supported or not.
+ LongOps = 0x5, // Long is supported or not.
+ Images = 0x6, // Images are supported or not.
+ ByteStores = 0x7, // ByteStores available(!HD4XXX).
+ ConstantMem = 0x8, // Constant/CB memory.
+ LocalMem = 0x9, // Local/LDS memory.
+ PrivateMem = 0xA, // Scratch/Private/Stack memory.
+ RegionMem = 0xB, // OCL GDS Memory Extension.
+ FMA = 0xC, // Use HW FMA or SW FMA.
+ ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
+ MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
+ Reserved0 = 0xF, // ReservedFlag
+ NoAlias = 0x10, // Cached loads.
+ Signed24BitOps = 0x11, // Peephole Optimization.
+ // Debug mode implies that no hardware features or optimizations
+ // are performned and that all memory access go through a single
+ // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+ Debug = 0x12, // Debug mode is enabled.
+ CachedMem = 0x13, // Cached mem is available or not.
+ BarrierDetect = 0x14, // Detect duplicate barriers.
+ Reserved1 = 0x15, // Reserved flag
+ ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
+ ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
+ TmrReg = 0x18, // Flag to specify if Tmr register is supported.
+ NoInline = 0x19, // Flag to specify that no inlining should occur.
+ MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
+ HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
+ ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
+ PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
+ // If more capabilities are required, then
+ // this number needs to be increased.
+ // All capabilities must come before this
+ // number.
+ MaxNumberCapabilities = 0x20
+ };
+ // These have to be in order with the older generations
+ // having the lower number enumerations.
+ enum Generation {
+ HD4XXX = 0, // 7XX based devices.
+ HD5XXX, // Evergreen based devices.
+ HD6XXX, // NI/Evergreen+ based devices.
+ HD7XXX,
+ HDTEST, // Experimental feature testing device.
+ HDNUMGEN
+ };
+
+
+ } // namespace AMDILDeviceInfo
+ llvm::AMDILDevice*
+ getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
+} // namespace llvm
+#endif // _AMDILDEVICEINFO_H_
diff --git a/lib/Target/AMDIL/AMDILDevices.h b/lib/Target/AMDIL/AMDILDevices.h
new file mode 100644
index 0000000..3fc5fa0
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILDevices.h
@@ -0,0 +1,19 @@
+//===-- AMDILDevices.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef __AMDIL_DEVICES_H_
+#define __AMDIL_DEVICES_H_
+// Include all of the device specific header files
+// This file is for Internal use only!
+#include "AMDIL7XXDevice.h"
+#include "AMDILDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSIDevice.h"
+
+#endif // _AMDIL_DEVICES_H_
diff --git a/lib/Target/AMDIL/AMDILEvergreenDevice.cpp b/lib/Target/AMDIL/AMDILEvergreenDevice.cpp
new file mode 100644
index 0000000..eb20beb
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILEvergreenDevice.cpp
@@ -0,0 +1,183 @@
+//===-- AMDILEvergreenDevice.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
+: AMDILDevice(ST) {
+ setCaps();
+ std::string name = ST->getDeviceName();
+ if (name == "cedar") {
+ mDeviceFlag = OCL_DEVICE_CEDAR;
+ } else if (name == "redwood") {
+ mDeviceFlag = OCL_DEVICE_REDWOOD;
+ } else if (name == "cypress") {
+ mDeviceFlag = OCL_DEVICE_CYPRESS;
+ } else {
+ mDeviceFlag = OCL_DEVICE_JUNIPER;
+ }
+}
+
+AMDILEvergreenDevice::~AMDILEvergreenDevice() {
+}
+
+size_t AMDILEvergreenDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+size_t AMDILEvergreenDevice::getMaxGDSSize() const {
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const {
+ return 12;
+}
+
+uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const {
+ switch(id) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+ return GLOBAL_RETURN_RAW_UAV_ID;
+ } else {
+ return DEFAULT_RAW_UAV_ID;
+ }
+ case GLOBAL_ID:
+ case ARENA_UAV_ID:
+ return DEFAULT_ARENA_UAV_ID;
+ case LDS_ID:
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case GDS_ID:
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case SCRATCH_ID:
+ if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ };
+ return 0;
+}
+
+size_t AMDILEvergreenDevice::getWavefrontSize() const {
+ return AMDILDevice::WavefrontSize;
+}
+
+uint32_t AMDILEvergreenDevice::getGeneration() const {
+ return AMDILDeviceInfo::HD5XXX;
+}
+
+void AMDILEvergreenDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::ArenaSegment);
+ mHWBits.set(AMDILDeviceInfo::ArenaUAV);
+ if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
+ mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
+ }
+ mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
+ if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
+ mHWBits.set(AMDILDeviceInfo::ByteStores);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+ mSWBits.set(AMDILDeviceInfo::RegionMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::LocalMem);
+ mHWBits.set(AMDILDeviceInfo::RegionMem);
+ }
+ mHWBits.set(AMDILDeviceInfo::Images);
+ if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) {
+ mHWBits.set(AMDILDeviceInfo::NoAlias);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+ mHWBits.set(AMDILDeviceInfo::CachedMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
+ mHWBits.set(AMDILDeviceInfo::MultiUAV);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_SC_136) {
+ mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
+ mHWBits.set(AMDILDeviceInfo::ArenaVectors);
+ } else {
+ mSWBits.set(AMDILDeviceInfo::ArenaVectors);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_SC_137) {
+ mHWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.reset(AMDILDeviceInfo::LongOps);
+ }
+ mHWBits.set(AMDILDeviceInfo::TmrReg);
+}
+
+AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILCypressDevice::~AMDILCypressDevice() {
+}
+
+void AMDILCypressDevice::setCaps() {
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ mHWBits.set(AMDILDeviceInfo::FMA);
+ }
+}
+
+
+AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILCedarDevice::~AMDILCedarDevice() {
+}
+
+void AMDILCedarDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILCedarDevice::getWavefrontSize() const {
+ return AMDILDevice::QuarterWavefrontSize;
+}
+
+AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILRedwoodDevice::~AMDILRedwoodDevice()
+{
+}
+
+void AMDILRedwoodDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILRedwoodDevice::getWavefrontSize() const {
+ return AMDILDevice::HalfWavefrontSize;
+}
diff --git a/lib/Target/AMDIL/AMDILEvergreenDevice.h b/lib/Target/AMDIL/AMDILEvergreenDevice.h
new file mode 100644
index 0000000..2639ab8
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILEvergreenDevice.h
@@ -0,0 +1,87 @@
+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILEVERGREENDEVICE_H_
+#define _AMDILEVERGREENDEVICE_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+ class AMDILSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+// The AMDILEvergreenDevice is the base device class for all of the Evergreen
+// series of cards. This class contains information required to differentiate
+// the Evergreen device from the generic AMDILDevice. This device represents
+// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDILEvergreenDevice : public AMDILDevice {
+public:
+ AMDILEvergreenDevice(AMDILSubtarget *ST);
+ virtual ~AMDILEvergreenDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getMaxGDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getMaxNumUAVs() const;
+ virtual uint32_t getResourceID(uint32_t) const;
+protected:
+ virtual void setCaps();
+}; // AMDILEvergreenDevice
+
+// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has
+// support for double precision operations. This device is used to represent
+// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+// and HD59XX cards.
+class AMDILCypressDevice : public AMDILEvergreenDevice {
+public:
+ AMDILCypressDevice(AMDILSubtarget *ST);
+ virtual ~AMDILCypressDevice();
+private:
+ virtual void setCaps();
+}; // AMDILCypressDevice
+
+
+// The AMDILCedarDevice is the class that represents all of the 'Cedar' based
+// devices. This class differs from the base AMDILEvergreenDevice in that the
+// device is a ~quarter of the 'Juniper'. These are commercially known as the
+// HD54XX and HD53XX series of cards.
+class AMDILCedarDevice : public AMDILEvergreenDevice {
+public:
+ AMDILCedarDevice(AMDILSubtarget *ST);
+ virtual ~AMDILCedarDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDILCedarDevice
+
+// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based
+// devices. This class differs from the base class, in that these devices are
+// considered about half of a 'Juniper' device. These are commercially known as
+// the HD55XX and HD56XX series of cards.
+class AMDILRedwoodDevice : public AMDILEvergreenDevice {
+public:
+ AMDILRedwoodDevice(AMDILSubtarget *ST);
+ virtual ~AMDILRedwoodDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDILRedwoodDevice
+
+} // namespace llvm
+#endif // _AMDILEVERGREENDEVICE_H_
diff --git a/lib/Target/AMDIL/AMDILFrameLowering.cpp b/lib/Target/AMDIL/AMDILFrameLowering.cpp
new file mode 100644
index 0000000..87eca87
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILFrameLowering.cpp
@@ -0,0 +1,53 @@
+//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILFrameLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+using namespace llvm;
+AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl,
+ int LAO, unsigned TransAl)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl)
+{
+}
+
+AMDILFrameLowering::~AMDILFrameLowering()
+{
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index.
+int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI);
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
+{
+ NumEntries = 0;
+ return 0;
+}
+void
+AMDILFrameLowering::emitPrologue(MachineFunction &MF) const
+{
+}
+void
+AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+}
+bool
+AMDILFrameLowering::hasFP(const MachineFunction &MF) const
+{
+ return false;
+}
diff --git a/lib/Target/AMDIL/AMDILFrameLowering.h b/lib/Target/AMDIL/AMDILFrameLowering.h
new file mode 100644
index 0000000..b1d919e
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILFrameLowering.h
@@ -0,0 +1,46 @@
+//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILFRAME_LOWERING_H_
+#define _AMDILFRAME_LOWERING_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+/// Information about the stack frame layout on the AMDIL targets. It holds
+/// the direction of the stack growth, the known stack alignment on entry to
+/// each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+
+namespace llvm {
+ class AMDILFrameLowering : public TargetFrameLowering {
+ public:
+ AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
+ TransAl = 1);
+ virtual ~AMDILFrameLowering();
+ virtual int getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const;
+ virtual const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+ }; // class AMDILFrameLowering
+} // namespace llvm
+#endif // _AMDILFRAME_LOWERING_H_
diff --git a/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp b/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp
new file mode 100644
index 0000000..ff04d9d
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp
@@ -0,0 +1,457 @@
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AMDIL target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
+// //for SelectionDAG operations.
+//
+namespace {
+class AMDILDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const AMDILSubtarget &Subtarget;
+public:
+ AMDILDAGToDAGISel(AMDILTargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+ virtual ~AMDILDAGToDAGISel();
+ inline SDValue getSmallIPtrImm(unsigned Imm);
+
+ SDNode *Select(SDNode *N);
+ // Complex pattern selectors
+ bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
+ bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+ static bool isGlobalStore(const StoreSDNode *N);
+ static bool isPrivateStore(const StoreSDNode *N);
+ static bool isLocalStore(const StoreSDNode *N);
+ static bool isRegionStore(const StoreSDNode *N);
+
+ static bool isCPLoad(const LoadSDNode *N);
+ static bool isConstantLoad(const LoadSDNode *N, int cbID);
+ static bool isGlobalLoad(const LoadSDNode *N);
+ static bool isPrivateLoad(const LoadSDNode *N);
+ static bool isLocalLoad(const LoadSDNode *N);
+ static bool isRegionLoad(const LoadSDNode *N);
+
+ virtual const char *getPassName() const;
+private:
+ SDNode *xformAtomicInst(SDNode *N);
+
+ // Include the pieces autogenerated from the target description.
+#include "AMDILGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific
+// DAG, ready for instruction scheduling.
+//
+FunctionPass *llvm::createAMDILISelDag(AMDILTargetMachine &TM
+ AMDIL_OPT_LEVEL_DECL) {
+ return new AMDILDAGToDAGISel(TM AMDIL_OPT_LEVEL_VAR);
+}
+
+AMDILDAGToDAGISel::AMDILDAGToDAGISel(AMDILTargetMachine &TM
+ AMDIL_OPT_LEVEL_DECL)
+ : SelectionDAGISel(TM AMDIL_OPT_LEVEL_VAR), Subtarget(TM.getSubtarget<AMDILSubtarget>())
+{
+}
+
+AMDILDAGToDAGISel::~AMDILDAGToDAGISel() {
+}
+
+SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDILDAGToDAGISel::SelectADDRParam(
+ SDValue Addr, SDValue& R1, SDValue& R2) {
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ return true;
+}
+
+bool AMDILDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+ return SelectADDRParam(Addr, R1, R2);
+}
+
+
+bool AMDILDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ return true;
+}
+
+SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ switch (Opc) {
+ default: break;
+ case ISD::FrameIndex:
+ {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
+ unsigned int FI = FIN->getIndex();
+ EVT OpVT = N->getValueType(0);
+ unsigned int NewOpc = AMDIL::MOVE_i32;
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
+ }
+ }
+ break;
+ }
+ // For all atomic instructions, we need to add a constant
+ // operand that stores the resource ID in the instruction
+ if (Opc > AMDILISD::ADDADDR && Opc < AMDILISD::APPEND_ALLOC) {
+ N = xformAtomicInst(N);
+ }
+ return SelectCode(N);
+}
+
+bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+ return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
+}
+
+bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
+ if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
+ return true;
+ }
+ MachineMemOperand *MMO = N->getMemOperand();
+ const Value *V = MMO->getValue();
+ const Value *BV = getBasePointerValue(V);
+ if (MMO
+ && MMO->getValue()
+ && ((V && dyn_cast<GlobalValue>(V))
+ || (BV && dyn_cast<GlobalValue>(
+ getBasePointerValue(MMO->getValue()))))) {
+ return check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
+ } else {
+ return false;
+ }
+}
+
+bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
+ MachineMemOperand *MMO = N->getMemOperand();
+ if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+ if (MMO) {
+ const Value *V = MMO->getValue();
+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
+ if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+ // Check to make sure we are not a constant pool load or a constant load
+ // that is marked as a private load
+ if (isCPLoad(N) || isConstantLoad(N, -1)) {
+ return false;
+ }
+ }
+ if (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
+ {
+ return true;
+ }
+ return false;
+}
+
+const char *AMDILDAGToDAGISel::getPassName() const {
+ return "AMDIL DAG->DAG Pattern Instruction Selection";
+}
+
+SDNode*
+AMDILDAGToDAGISel::xformAtomicInst(SDNode *N)
+{
+ uint32_t addVal = 1;
+ bool addOne = false;
+ // bool bitCastToInt = (N->getValueType(0) == MVT::f32);
+ unsigned opc = N->getOpcode();
+ switch (opc) {
+ default: return N;
+ case AMDILISD::ATOM_G_ADD:
+ case AMDILISD::ATOM_G_AND:
+ case AMDILISD::ATOM_G_MAX:
+ case AMDILISD::ATOM_G_UMAX:
+ case AMDILISD::ATOM_G_MIN:
+ case AMDILISD::ATOM_G_UMIN:
+ case AMDILISD::ATOM_G_OR:
+ case AMDILISD::ATOM_G_SUB:
+ case AMDILISD::ATOM_G_RSUB:
+ case AMDILISD::ATOM_G_XCHG:
+ case AMDILISD::ATOM_G_XOR:
+ case AMDILISD::ATOM_G_ADD_NORET:
+ case AMDILISD::ATOM_G_AND_NORET:
+ case AMDILISD::ATOM_G_MAX_NORET:
+ case AMDILISD::ATOM_G_UMAX_NORET:
+ case AMDILISD::ATOM_G_MIN_NORET:
+ case AMDILISD::ATOM_G_UMIN_NORET:
+ case AMDILISD::ATOM_G_OR_NORET:
+ case AMDILISD::ATOM_G_SUB_NORET:
+ case AMDILISD::ATOM_G_RSUB_NORET:
+ case AMDILISD::ATOM_G_XCHG_NORET:
+ case AMDILISD::ATOM_G_XOR_NORET:
+ case AMDILISD::ATOM_L_ADD:
+ case AMDILISD::ATOM_L_AND:
+ case AMDILISD::ATOM_L_MAX:
+ case AMDILISD::ATOM_L_UMAX:
+ case AMDILISD::ATOM_L_MIN:
+ case AMDILISD::ATOM_L_UMIN:
+ case AMDILISD::ATOM_L_OR:
+ case AMDILISD::ATOM_L_SUB:
+ case AMDILISD::ATOM_L_RSUB:
+ case AMDILISD::ATOM_L_XCHG:
+ case AMDILISD::ATOM_L_XOR:
+ case AMDILISD::ATOM_L_ADD_NORET:
+ case AMDILISD::ATOM_L_AND_NORET:
+ case AMDILISD::ATOM_L_MAX_NORET:
+ case AMDILISD::ATOM_L_UMAX_NORET:
+ case AMDILISD::ATOM_L_MIN_NORET:
+ case AMDILISD::ATOM_L_UMIN_NORET:
+ case AMDILISD::ATOM_L_OR_NORET:
+ case AMDILISD::ATOM_L_SUB_NORET:
+ case AMDILISD::ATOM_L_RSUB_NORET:
+ case AMDILISD::ATOM_L_XCHG_NORET:
+ case AMDILISD::ATOM_L_XOR_NORET:
+ case AMDILISD::ATOM_R_ADD:
+ case AMDILISD::ATOM_R_AND:
+ case AMDILISD::ATOM_R_MAX:
+ case AMDILISD::ATOM_R_UMAX:
+ case AMDILISD::ATOM_R_MIN:
+ case AMDILISD::ATOM_R_UMIN:
+ case AMDILISD::ATOM_R_OR:
+ case AMDILISD::ATOM_R_SUB:
+ case AMDILISD::ATOM_R_RSUB:
+ case AMDILISD::ATOM_R_XCHG:
+ case AMDILISD::ATOM_R_XOR:
+ case AMDILISD::ATOM_R_ADD_NORET:
+ case AMDILISD::ATOM_R_AND_NORET:
+ case AMDILISD::ATOM_R_MAX_NORET:
+ case AMDILISD::ATOM_R_UMAX_NORET:
+ case AMDILISD::ATOM_R_MIN_NORET:
+ case AMDILISD::ATOM_R_UMIN_NORET:
+ case AMDILISD::ATOM_R_OR_NORET:
+ case AMDILISD::ATOM_R_SUB_NORET:
+ case AMDILISD::ATOM_R_RSUB_NORET:
+ case AMDILISD::ATOM_R_XCHG_NORET:
+ case AMDILISD::ATOM_R_XOR_NORET:
+ case AMDILISD::ATOM_G_CMPXCHG:
+ case AMDILISD::ATOM_G_CMPXCHG_NORET:
+ case AMDILISD::ATOM_L_CMPXCHG:
+ case AMDILISD::ATOM_L_CMPXCHG_NORET:
+ case AMDILISD::ATOM_R_CMPXCHG:
+ case AMDILISD::ATOM_R_CMPXCHG_NORET:
+ break;
+ case AMDILISD::ATOM_G_DEC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_G_INC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_ADD;
+ }
+ break;
+ case AMDILISD::ATOM_G_DEC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_SUB_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_G_INC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_ADD_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_L_DEC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_L_INC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_ADD;
+ }
+ break;
+ case AMDILISD::ATOM_L_DEC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_SUB_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_L_INC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_ADD_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_R_DEC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_R_INC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_ADD;
+ }
+ break;
+ case AMDILISD::ATOM_R_DEC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_R_INC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_ADD_NORET;
+ }
+ break;
+ }
+ // The largest we can have is a cmpxchg w/ a return value and an output chain.
+ // The cmpxchg function has 3 inputs and a single output along with an
+ // output change and a target constant, giving a total of 6.
+ SDValue Ops[12];
+ unsigned x = 0;
+ unsigned y = N->getNumOperands();
+ for (x = 0; x < y; ++x) {
+ Ops[x] = N->getOperand(x);
+ }
+ if (addOne) {
+ Ops[x++] = SDValue(SelectCode(CurDAG->getConstant(addVal, MVT::i32).getNode()), 0);
+ }
+ Ops[x++] = CurDAG->getTargetConstant(0, MVT::i32);
+ SDVTList Tys = N->getVTList();
+ MemSDNode *MemNode = dyn_cast<MemSDNode>(N);
+ assert(MemNode && "Atomic should be of MemSDNode type!");
+ N = CurDAG->getMemIntrinsicNode(opc, N->getDebugLoc(), Tys, Ops, x,
+ MemNode->getMemoryVT(), MemNode->getMemOperand()).getNode();
+ return N;
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
diff --git a/lib/Target/AMDIL/AMDILISelLowering.cpp b/lib/Target/AMDIL/AMDILISelLowering.cpp
new file mode 100644
index 0000000..6179d11
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILISelLowering.cpp
@@ -0,0 +1,5576 @@
+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that AMDIL uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILISelLowering.h"
+#include "AMDILDevices.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+#define ISDBITCAST ISD::BITCAST
+#define MVTGLUE MVT::Glue
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+#include "AMDILGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions Begin
+//===----------------------------------------------------------------------===//
+ static SDValue
+getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
+{
+ DebugLoc DL = Src.getDebugLoc();
+ EVT svt = Src.getValueType().getScalarType();
+ EVT dvt = Dst.getValueType().getScalarType();
+ if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
+ if (dvt.bitsGT(svt)) {
+ Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
+ } else if (svt.bitsLT(svt)) {
+ Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
+ DAG.getConstant(1, MVT::i32));
+ }
+ } else if (svt.isInteger() && dvt.isInteger()) {
+ if (!svt.bitsEq(dvt)) {
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ } else {
+ Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
+ }
+ } else if (svt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
+ if (!svt.bitsEq(dvt)) {
+ if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
+ } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ }
+ Src = DAG.getNode(opcode, DL, dvt, Src);
+ } else if (dvt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
+ if (svt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getNode(opcode, DL, MVT::i32, Src);
+ } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getNode(opcode, DL, MVT::i64, Src);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ }
+ return Src;
+}
+// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
+// condition.
+ static AMDILCC::CondCodes
+CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
+{
+ switch (CC) {
+ default:
+ {
+ errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
+ assert(0 && "Unknown condition code!");
+ }
+ case ISD::SETO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_O;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_O;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UO;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UO;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_NE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_NE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_EQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_EQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UNE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UNE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UEQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETONE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ONE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ONE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOEQ:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OEQ;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ };
+}
+
+ static unsigned int
+translateToOpcode(uint64_t CCCode, unsigned int regClass)
+{
+ switch (CCCode) {
+ case AMDILCC::IL_CC_D_EQ:
+ case AMDILCC::IL_CC_D_OEQ:
+ if (regClass == AMDIL::GPRV2F64RegClassID) {
+ return (unsigned int)AMDIL::DEQ_v2f64;
+ } else {
+ return (unsigned int)AMDIL::DEQ;
+ }
+ case AMDILCC::IL_CC_D_LE:
+ case AMDILCC::IL_CC_D_OLE:
+ case AMDILCC::IL_CC_D_ULE:
+ case AMDILCC::IL_CC_D_GE:
+ case AMDILCC::IL_CC_D_OGE:
+ case AMDILCC::IL_CC_D_UGE:
+ return (unsigned int)AMDIL::DGE;
+ case AMDILCC::IL_CC_D_LT:
+ case AMDILCC::IL_CC_D_OLT:
+ case AMDILCC::IL_CC_D_ULT:
+ case AMDILCC::IL_CC_D_GT:
+ case AMDILCC::IL_CC_D_OGT:
+ case AMDILCC::IL_CC_D_UGT:
+ return (unsigned int)AMDIL::DLT;
+ case AMDILCC::IL_CC_D_NE:
+ case AMDILCC::IL_CC_D_UNE:
+ return (unsigned int)AMDIL::DNE;
+ case AMDILCC::IL_CC_F_EQ:
+ case AMDILCC::IL_CC_F_OEQ:
+ return (unsigned int)AMDIL::FEQ;
+ case AMDILCC::IL_CC_F_LE:
+ case AMDILCC::IL_CC_F_ULE:
+ case AMDILCC::IL_CC_F_OLE:
+ case AMDILCC::IL_CC_F_GE:
+ case AMDILCC::IL_CC_F_UGE:
+ case AMDILCC::IL_CC_F_OGE:
+ return (unsigned int)AMDIL::FGE;
+ case AMDILCC::IL_CC_F_LT:
+ case AMDILCC::IL_CC_F_OLT:
+ case AMDILCC::IL_CC_F_ULT:
+ case AMDILCC::IL_CC_F_GT:
+ case AMDILCC::IL_CC_F_OGT:
+ case AMDILCC::IL_CC_F_UGT:
+ if (regClass == AMDIL::GPRV2F32RegClassID) {
+ return (unsigned int)AMDIL::FLT_v2f32;
+ } else if (regClass == AMDIL::GPRV4F32RegClassID) {
+ return (unsigned int)AMDIL::FLT_v4f32;
+ } else {
+ return (unsigned int)AMDIL::FLT;
+ }
+ case AMDILCC::IL_CC_F_NE:
+ case AMDILCC::IL_CC_F_UNE:
+ return (unsigned int)AMDIL::FNE;
+ case AMDILCC::IL_CC_I_EQ:
+ case AMDILCC::IL_CC_U_EQ:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IEQ;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRV2I8RegClassID
+ || regClass == AMDIL::GPRV2I16RegClassID) {
+ return (unsigned int)AMDIL::IEQ_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRV4I8RegClassID
+ || regClass == AMDIL::GPRV4I16RegClassID) {
+ return (unsigned int)AMDIL::IEQ_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_EQ:
+ case AMDILCC::IL_CC_UL_EQ:
+ return (unsigned int)AMDIL::LEQ;
+ case AMDILCC::IL_CC_I_GE:
+ case AMDILCC::IL_CC_I_LE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_I_LT:
+ case AMDILCC::IL_CC_I_GT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_GE:
+ return (unsigned int)AMDIL::LGE;
+ case AMDILCC::IL_CC_L_LE:
+ return (unsigned int)AMDIL::LLE;
+ case AMDILCC::IL_CC_L_LT:
+ return (unsigned int)AMDIL::LLT;
+ case AMDILCC::IL_CC_L_GT:
+ return (unsigned int)AMDIL::LGT;
+ case AMDILCC::IL_CC_I_NE:
+ case AMDILCC::IL_CC_U_NE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_U_GE:
+ case AMDILCC::IL_CC_U_LE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_NE:
+ case AMDILCC::IL_CC_UL_NE:
+ return (unsigned int)AMDIL::LNE;
+ case AMDILCC::IL_CC_UL_GE:
+ return (unsigned int)AMDIL::ULGE;
+ case AMDILCC::IL_CC_UL_LE:
+ return (unsigned int)AMDIL::ULLE;
+ case AMDILCC::IL_CC_U_LT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_U_GT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_UL_LT:
+ return (unsigned int)AMDIL::ULLT;
+ case AMDILCC::IL_CC_UL_GT:
+ return (unsigned int)AMDIL::ULGT;
+ case AMDILCC::IL_CC_F_UEQ:
+ case AMDILCC::IL_CC_D_UEQ:
+ case AMDILCC::IL_CC_F_ONE:
+ case AMDILCC::IL_CC_D_ONE:
+ case AMDILCC::IL_CC_F_O:
+ case AMDILCC::IL_CC_F_UO:
+ case AMDILCC::IL_CC_D_O:
+ case AMDILCC::IL_CC_D_UO:
+ // we don't care
+ return 0;
+
+ }
+ errs()<<"Opcode: "<<CCCode<<"\n";
+ assert(0 && "Unknown opcode retrieved");
+ return 0;
+}
+SDValue
+AMDILTargetLowering::LowerMemArgument(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned i) const
+{
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
+ getTargetMachine().Options.GuaranteedTailCallOpt;
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can
+ // be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal())
+ return FIN;
+ return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+}
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction generation functions
+//===----------------------------------------------------------------------===//
+uint32_t
+AMDILTargetLowering::addExtensionInstructions(
+ uint32_t reg, bool signedShift,
+ unsigned int simpleVT) const
+{
+ int shiftSize = 0;
+ uint32_t LShift, RShift;
+ switch(simpleVT)
+ {
+ default:
+ return reg;
+ case AMDIL::GPRI8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_i8;
+ if (signedShift) {
+ RShift = AMDIL::SHR_i8;
+ } else {
+ RShift = AMDIL::USHR_i8;
+ }
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_v2i8;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v2i8;
+ } else {
+ RShift = AMDIL::USHR_v2i8;
+ }
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_v4i8;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v4i8;
+ } else {
+ RShift = AMDIL::USHR_v4i8;
+ }
+ break;
+ case AMDIL::GPRI16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_i16;
+ if (signedShift) {
+ RShift = AMDIL::SHR_i16;
+ } else {
+ RShift = AMDIL::USHR_i16;
+ }
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_v2i16;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v2i16;
+ } else {
+ RShift = AMDIL::USHR_v2i16;
+ }
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_v4i16;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v4i16;
+ } else {
+ RShift = AMDIL::USHR_v4i16;
+ }
+ break;
+ };
+ uint32_t LoadReg = genVReg(simpleVT);
+ uint32_t tmp1 = genVReg(simpleVT);
+ uint32_t tmp2 = genVReg(simpleVT);
+ generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
+ generateMachineInst(LShift, tmp1, reg, LoadReg);
+ generateMachineInst(RShift, tmp2, tmp1, LoadReg);
+ return tmp2;
+}
+
+MachineOperand
+AMDILTargetLowering::convertToReg(MachineOperand op) const
+{
+ if (op.isReg()) {
+ return op;
+ } else if (op.isImm()) {
+ uint32_t loadReg
+ = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
+ generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
+ .addImm(op.getImm());
+ op.ChangeToRegister(loadReg, false);
+ } else if (op.isFPImm()) {
+ uint32_t loadReg
+ = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
+ generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
+ .addFPImm(op.getFPImm());
+ op.ChangeToRegister(loadReg, false);
+ } else if (op.isMBB()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isFI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isCPI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isJTI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isGlobal()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isSymbol()) {
+ op.ChangeToRegister(0, false);
+ }/* else if (op.isMetadata()) {
+ op.ChangeToRegister(0, false);
+ }*/
+ return op;
+}
+
+void
+AMDILTargetLowering::generateCMPInstr(
+ MachineInstr *MI,
+ MachineBasicBlock *BB,
+ const TargetInstrInfo& TII)
+const
+{
+ MachineOperand DST = MI->getOperand(0);
+ MachineOperand CC = MI->getOperand(1);
+ MachineOperand LHS = MI->getOperand(2);
+ MachineOperand RHS = MI->getOperand(3);
+ int64_t ccCode = CC.getImm();
+ unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
+ unsigned int opCode = translateToOpcode(ccCode, simpleVT);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock::iterator BBI = MI;
+ setPrivateData(BB, BBI, &DL, &TII);
+ if (!LHS.isReg()) {
+ LHS = convertToReg(LHS);
+ }
+ if (!RHS.isReg()) {
+ RHS = convertToReg(RHS);
+ }
+ switch (ccCode) {
+ case AMDILCC::IL_CC_I_EQ:
+ case AMDILCC::IL_CC_I_NE:
+ case AMDILCC::IL_CC_I_GE:
+ case AMDILCC::IL_CC_I_LT:
+ {
+ uint32_t lhsreg = addExtensionInstructions(
+ LHS.getReg(), true, simpleVT);
+ uint32_t rhsreg = addExtensionInstructions(
+ RHS.getReg(), true, simpleVT);
+ generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
+ }
+ break;
+ case AMDILCC::IL_CC_U_EQ:
+ case AMDILCC::IL_CC_U_NE:
+ case AMDILCC::IL_CC_U_GE:
+ case AMDILCC::IL_CC_U_LT:
+ case AMDILCC::IL_CC_D_EQ:
+ case AMDILCC::IL_CC_F_EQ:
+ case AMDILCC::IL_CC_F_OEQ:
+ case AMDILCC::IL_CC_D_OEQ:
+ case AMDILCC::IL_CC_D_NE:
+ case AMDILCC::IL_CC_F_NE:
+ case AMDILCC::IL_CC_F_UNE:
+ case AMDILCC::IL_CC_D_UNE:
+ case AMDILCC::IL_CC_D_GE:
+ case AMDILCC::IL_CC_F_GE:
+ case AMDILCC::IL_CC_D_OGE:
+ case AMDILCC::IL_CC_F_OGE:
+ case AMDILCC::IL_CC_D_LT:
+ case AMDILCC::IL_CC_F_LT:
+ case AMDILCC::IL_CC_F_OLT:
+ case AMDILCC::IL_CC_D_OLT:
+ generateMachineInst(opCode, DST.getReg(),
+ LHS.getReg(), RHS.getReg());
+ break;
+ case AMDILCC::IL_CC_I_GT:
+ case AMDILCC::IL_CC_I_LE:
+ {
+ uint32_t lhsreg = addExtensionInstructions(
+ LHS.getReg(), true, simpleVT);
+ uint32_t rhsreg = addExtensionInstructions(
+ RHS.getReg(), true, simpleVT);
+ generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
+ }
+ break;
+ case AMDILCC::IL_CC_U_GT:
+ case AMDILCC::IL_CC_U_LE:
+ case AMDILCC::IL_CC_F_GT:
+ case AMDILCC::IL_CC_D_GT:
+ case AMDILCC::IL_CC_F_OGT:
+ case AMDILCC::IL_CC_D_OGT:
+ case AMDILCC::IL_CC_F_LE:
+ case AMDILCC::IL_CC_D_LE:
+ case AMDILCC::IL_CC_D_OLE:
+ case AMDILCC::IL_CC_F_OLE:
+ generateMachineInst(opCode, DST.getReg(),
+ RHS.getReg(), LHS.getReg());
+ break;
+ case AMDILCC::IL_CC_F_UGT:
+ case AMDILCC::IL_CC_F_ULE:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(opCode, VReg[0],
+ RHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_ULT:
+ case AMDILCC::IL_CC_F_UGE:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(opCode, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UGT:
+ case AMDILCC::IL_CC_D_ULE:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(opCode, VReg[0],
+ RHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UGE:
+ case AMDILCC::IL_CC_D_ULT:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(opCode, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_UEQ:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(AMDIL::FEQ, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_ONE:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(AMDIL::FNE, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::FEQ, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UEQ:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DEQ, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+
+ }
+ break;
+ case AMDILCC::IL_CC_D_ONE:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DNE, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::DEQ, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+
+ }
+ break;
+ case AMDILCC::IL_CC_F_O:
+ {
+ uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
+ generateMachineInst(AMDIL::FEQ, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_O:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DEQ, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_UO:
+ {
+ uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
+ generateMachineInst(AMDIL::FNE, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UO:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DNE, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_L_LE:
+ case AMDILCC::IL_CC_L_GE:
+ case AMDILCC::IL_CC_L_EQ:
+ case AMDILCC::IL_CC_L_NE:
+ case AMDILCC::IL_CC_L_LT:
+ case AMDILCC::IL_CC_L_GT:
+ case AMDILCC::IL_CC_UL_LE:
+ case AMDILCC::IL_CC_UL_GE:
+ case AMDILCC::IL_CC_UL_EQ:
+ case AMDILCC::IL_CC_UL_NE:
+ case AMDILCC::IL_CC_UL_LT:
+ case AMDILCC::IL_CC_UL_GT:
+ {
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
+ generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg());
+ } else {
+ generateLongRelational(MI, opCode);
+ }
+ }
+ break;
+ case AMDILCC::COND_ERROR:
+ assert(0 && "Invalid CC code");
+ break;
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+ AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
+: TargetLowering(TM, new TargetLoweringObjectFileELF())
+{
+ int types[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::f32,
+ (int)MVT::f64,
+ (int)MVT::i64,
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+
+ int IntTypes[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::i64
+ };
+
+ int FloatTypes[] =
+ {
+ (int)MVT::f32,
+ (int)MVT::f64
+ };
+
+ int VectorTypes[] =
+ {
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+ size_t numTypes = sizeof(types) / sizeof(*types);
+ size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+ size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+ size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ // These are the current register classes that are
+ // supported
+
+ addRegisterClass(MVT::i32, &AMDIL::GPRI32RegClass);
+ addRegisterClass(MVT::f32, &AMDIL::GPRF32RegClass);
+
+ if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ addRegisterClass(MVT::f64, &AMDIL::GPRF64RegClass);
+ addRegisterClass(MVT::v2f64, &AMDIL::GPRV2F64RegClass);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
+ addRegisterClass(MVT::i8, &AMDIL::GPRI8RegClass);
+ addRegisterClass(MVT::v2i8, &AMDIL::GPRV2I8RegClass);
+ addRegisterClass(MVT::v4i8, &AMDIL::GPRV4I8RegClass);
+ setOperationAction(ISD::Constant , MVT::i8 , Legal);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
+ addRegisterClass(MVT::i16, &AMDIL::GPRI16RegClass);
+ addRegisterClass(MVT::v2i16, &AMDIL::GPRV2I16RegClass);
+ addRegisterClass(MVT::v4i16, &AMDIL::GPRV4I16RegClass);
+ setOperationAction(ISD::Constant , MVT::i16 , Legal);
+ }
+ addRegisterClass(MVT::v2f32, &AMDIL::GPRV2F32RegClass);
+ addRegisterClass(MVT::v4f32, &AMDIL::GPRV4F32RegClass);
+ addRegisterClass(MVT::v2i32, &AMDIL::GPRV2I32RegClass);
+ addRegisterClass(MVT::v4i32, &AMDIL::GPRV4I32RegClass);
+ if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
+ addRegisterClass(MVT::i64, &AMDIL::GPRI64RegClass);
+ addRegisterClass(MVT::v2i64, &AMDIL::GPRV2I64RegClass);
+ }
+
+ for (unsigned int x = 0; x < numTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+ // We cannot sextinreg, expand to shifts
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+ setOperationAction(ISD::OR, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::BRCOND, VT, Custom);
+ setOperationAction(ISD::BR_CC, VT, Custom);
+ setOperationAction(ISD::BR_JT, VT, Expand);
+ setOperationAction(ISD::BRIND, VT, Expand);
+ // TODO: Implement custom UREM/SREM routines
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISDBITCAST, VT, Custom);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ if (VT != MVT::i64 && VT != MVT::v2i64) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ }
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ }
+ for (unsigned int x = 0; x < numFloatTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+ // IL does not have these operations for floating point types
+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::SETOLT, VT, Expand);
+ setOperationAction(ISD::SETOGE, VT, Expand);
+ setOperationAction(ISD::SETOGT, VT, Expand);
+ setOperationAction(ISD::SETOLE, VT, Expand);
+ setOperationAction(ISD::SETULT, VT, Expand);
+ setOperationAction(ISD::SETUGE, VT, Expand);
+ setOperationAction(ISD::SETUGT, VT, Expand);
+ setOperationAction(ISD::SETULE, VT, Expand);
+ }
+
+ for (unsigned int x = 0; x < numIntTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+ // GPU also does not have divrem function for signed or unsigned
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ // GPU doesn't have a rotl, rotr, or byteswap instruction
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ // GPU doesn't have any counting operators
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
+ {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ // setOperationAction(ISD::VSETCC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
+
+ }
+ setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
+ if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
+ if (stm->calVersion() < CAL_VERSION_SC_139
+ || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ setOperationAction(ISD::MUL, MVT::i64, Custom);
+ }
+ setOperationAction(ISD::SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+ setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ // we support loading/storing v2f64 but not operations on the type
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+ // We want to expand vector conversions into their scalar
+ // counterparts.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ }
+ // TODO: Fix the UDIV24 algorithm so it works for these
+ // types correctly. This needs vector comparisons
+ // for this to work correctly.
+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::Other, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+ setOperationAction(ISD::FDIV, MVT::f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
+ setOperationAction(ISD::TRAP , MVT::Other , Legal);
+
+ setStackPointerRegisterToSaveRestore(AMDIL::SP);
+ setSchedulingPreference(Sched::RegPressure);
+ setPow2DivIsCheap(false);
+ setPrefLoopAlignment(16);
+ setSelectIsExpensive(true);
+ setJumpIsExpensive(true);
+ computeRegisterProperties();
+
+ maxStoresPerMemcpy = 4096;
+ maxStoresPerMemmove = 4096;
+ maxStoresPerMemset = 4096;
+
+#undef numTypes
+#undef numIntTypes
+#undef numVectorTypes
+#undef numFloatTypes
+}
+
+const char *
+AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode) {
+ default: return 0;
+ case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
+ case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP";
+ case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP";
+ case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
+ case AMDILISD::CMOV: return "AMDILISD::CMOV";
+ case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
+ case AMDILISD::INEGATE: return "AMDILISD::INEGATE";
+ case AMDILISD::MAD: return "AMDILISD::MAD";
+ case AMDILISD::UMAD: return "AMDILISD::UMAD";
+ case AMDILISD::CALL: return "AMDILISD::CALL";
+ case AMDILISD::RET: return "AMDILISD::RET";
+ case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
+ case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
+ case AMDILISD::ADD: return "AMDILISD::ADD";
+ case AMDILISD::UMUL: return "AMDILISD::UMUL";
+ case AMDILISD::AND: return "AMDILISD::AND";
+ case AMDILISD::OR: return "AMDILISD::OR";
+ case AMDILISD::NOT: return "AMDILISD::NOT";
+ case AMDILISD::XOR: return "AMDILISD::XOR";
+ case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
+ case AMDILISD::SMAX: return "AMDILISD::SMAX";
+ case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
+ case AMDILISD::MOVE: return "AMDILISD::MOVE";
+ case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
+ case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
+ case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
+ case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
+ case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
+ case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
+ case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
+ case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
+ case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
+ case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
+ case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
+ case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
+ case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
+ case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
+ case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
+ case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
+ case AMDILISD::CMP: return "AMDILISD::CMP";
+ case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
+ case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
+ case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
+ case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
+ case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
+ case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
+ case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
+ case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
+ case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
+ case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
+ case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
+ case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
+ case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
+ case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
+ case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
+ case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
+ case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
+ case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
+ case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
+ case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
+ case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
+ case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
+ case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
+ case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
+ case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
+ case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
+ case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
+ case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
+ case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
+ case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
+ case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
+ case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
+ case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
+ case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
+ case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
+ case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
+ case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
+ case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
+ case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
+ case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
+ case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
+ case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
+ case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
+ case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
+ case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
+ case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
+ case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
+ case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
+ case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
+ case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
+ case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
+ case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
+ case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
+ case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
+ case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
+ case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
+ case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
+ case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
+ case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
+ case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
+ case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
+ case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
+ case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
+ case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
+ case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
+ case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
+ case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
+ case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
+ case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
+ case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
+ case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
+ case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
+ case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
+ case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
+ case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
+ case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
+ case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
+ case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
+ case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
+ case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
+ case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
+ case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
+ case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
+ case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
+ case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
+ case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
+ case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
+ case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
+ case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
+ case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
+ case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
+ case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
+ case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
+ case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
+ case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
+ case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
+ case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
+ case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
+ case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
+ case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
+ case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
+ case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
+ case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
+ case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
+ case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
+ case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
+ case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
+ case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
+ case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
+
+ };
+}
+bool
+AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const
+{
+ if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
+ || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
+ return false;
+ }
+ bool bitCastToInt = false;
+ unsigned IntNo;
+ bool isRet = true;
+ const AMDILSubtarget *STM = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ switch (Intrinsic) {
+ default: return false; // Don't custom lower most intrinsics.
+ case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
+ IntNo = AMDILISD::ATOM_G_ADD; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
+ IntNo = AMDILISD::ATOM_L_ADD; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
+ IntNo = AMDILISD::ATOM_R_ADD; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
+ IntNo = AMDILISD::ATOM_G_AND; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_AND_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
+ IntNo = AMDILISD::ATOM_L_AND; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_AND_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
+ IntNo = AMDILISD::ATOM_R_AND; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_AND_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
+ IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
+ IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
+ IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_G_SUB;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_G_SUB_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_L_SUB;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_L_SUB_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_R_SUB;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_R_SUB_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_G_ADD;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_G_ADD_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_L_ADD;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_L_ADD_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_R_ADD;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_R_ADD_NORET;
+ }
+ break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
+ IntNo = AMDILISD::ATOM_G_MAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
+ IntNo = AMDILISD::ATOM_G_UMAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
+ IntNo = AMDILISD::ATOM_L_MAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
+ IntNo = AMDILISD::ATOM_L_UMAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
+ IntNo = AMDILISD::ATOM_R_MAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
+ IntNo = AMDILISD::ATOM_R_UMAX; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
+ IntNo = AMDILISD::ATOM_G_MIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
+ IntNo = AMDILISD::ATOM_G_UMIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
+ IntNo = AMDILISD::ATOM_L_MIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
+ IntNo = AMDILISD::ATOM_L_UMIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
+ IntNo = AMDILISD::ATOM_R_MIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
+ IntNo = AMDILISD::ATOM_R_UMIN; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
+ IntNo = AMDILISD::ATOM_G_OR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_OR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
+ IntNo = AMDILISD::ATOM_L_OR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_OR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
+ IntNo = AMDILISD::ATOM_R_OR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_OR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
+ IntNo = AMDILISD::ATOM_G_SUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
+ IntNo = AMDILISD::ATOM_L_SUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
+ IntNo = AMDILISD::ATOM_R_SUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
+ IntNo = AMDILISD::ATOM_G_RSUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
+ IntNo = AMDILISD::ATOM_L_RSUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
+ IntNo = AMDILISD::ATOM_R_RSUB; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
+ IntNo = AMDILISD::ATOM_G_XCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
+ IntNo = AMDILISD::ATOM_L_XCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
+ IntNo = AMDILISD::ATOM_R_XCHG; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
+ bitCastToInt = true;
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
+ IntNo = AMDILISD::ATOM_G_XOR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
+ IntNo = AMDILISD::ATOM_L_XOR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
+ IntNo = AMDILISD::ATOM_R_XOR; break;
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
+ case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
+ IntNo = AMDILISD::APPEND_ALLOC; break;
+ case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
+ isRet = false;
+ IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
+ case AMDGPUIntrinsic::AMDIL_append_consume_i32:
+ IntNo = AMDILISD::APPEND_CONSUME; break;
+ case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
+ isRet = false;
+ IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
+ };
+
+ Info.opc = IntNo;
+ Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
+ Info.ptrVal = I.getOperand(0);
+ Info.offset = 0;
+ Info.align = 4;
+ Info.vol = true;
+ Info.readMem = isRet;
+ Info.writeMem = true;
+ return true;
+}
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool
+AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDILTargetLowering::computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const
+{
+ APInt KnownZero2;
+ APInt KnownOne2;
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
+ switch (Op.getOpcode()) {
+ default: break;
+ case AMDILISD::SELECT_CC:
+ DAG.ComputeMaskedBits(
+ Op.getOperand(1),
+ KnownZero,
+ KnownOne,
+ Depth + 1
+ );
+ DAG.ComputeMaskedBits(
+ Op.getOperand(0),
+ KnownZero2,
+ KnownOne2
+ );
+ assert((KnownZero & KnownOne) == 0
+ && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0
+ && "Bits known to be one AND zero?");
+ // Only known if known in both the LHS and RHS
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ };
+}
+
+// This is the function that determines which calling convention should
+// be used. Currently there is only one calling convention
+CCAssignFn*
+AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
+{
+ //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ return CC_AMDIL32;
+}
+
+// LowerCallResult - Lower the result values of an ISD::CALL into the
+// appropriate copies out of appropriate physical registers. This assumes that
+// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+// being lowered. The returns a SDNode with the same number of values as the
+// ISD::CALL.
+SDValue
+AMDILTargetLowering::LowerCallResult(
+ SDValue Chain,
+ SDValue InFlag,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const
+{
+ // Assign locations to each value returned by this call
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ if (RVLocs[i].isRegLoc()) {
+ Chain = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ RVLocs[i].getLocReg(),
+ CopyVT,
+ InFlag
+ ).getValue(1);
+ SDValue Val = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Val);
+ }
+ }
+
+ return Chain;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AMDILTargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr *MI, MachineBasicBlock *BB) const
+{
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ switch (MI->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::CMP);
+ generateCMPInstr(MI, BB, TII);
+ MI->eraseFromParent();
+ break;
+ default:
+ break;
+ }
+ return BB;
+}
+
+// Recursively assign SDNodeOrdering to any unordered nodes
+// This is necessary to maintain source ordering of instructions
+// under -O0 to avoid odd-looking "skipping around" issues.
+ static const SDValue
+Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
+{
+ if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
+ DAG.AssignOrdering( New.getNode(), order );
+ for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
+ Ordered( DAG, order, New.getOperand(i) );
+ }
+ return New;
+}
+
+#define LOWER(A) \
+ case ISD:: A: \
+return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
+
+SDValue
+AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ assert(0 && "Custom lowering code for this"
+ "instruction is not implemented yet!");
+ break;
+ LOWER(GlobalAddress);
+ LOWER(JumpTable);
+ LOWER(ConstantPool);
+ LOWER(ExternalSymbol);
+ LOWER(FP_TO_SINT);
+ LOWER(FP_TO_UINT);
+ LOWER(SINT_TO_FP);
+ LOWER(UINT_TO_FP);
+ LOWER(ADD);
+ LOWER(MUL);
+ LOWER(SUB);
+ LOWER(FDIV);
+ LOWER(SDIV);
+ LOWER(SREM);
+ LOWER(UDIV);
+ LOWER(UREM);
+ LOWER(BUILD_VECTOR);
+ LOWER(INSERT_VECTOR_ELT);
+ LOWER(EXTRACT_VECTOR_ELT);
+ LOWER(EXTRACT_SUBVECTOR);
+ LOWER(SCALAR_TO_VECTOR);
+ LOWER(CONCAT_VECTORS);
+ LOWER(AND);
+ LOWER(OR);
+ LOWER(SELECT);
+ LOWER(SELECT_CC);
+ LOWER(SETCC);
+ LOWER(SIGN_EXTEND_INREG);
+ LOWER(BITCAST);
+ LOWER(DYNAMIC_STACKALLOC);
+ LOWER(BRCOND);
+ LOWER(BR_CC);
+ LOWER(FP_ROUND);
+ }
+ return Op;
+}
+
+int
+AMDILTargetLowering::getVarArgsFrameOffset() const
+{
+ return VarArgsFrameOffset;
+}
+#undef LOWER
+
+SDValue
+AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = Op;
+ const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *G = GADN->getGlobal();
+ DebugLoc DL = Op.getDebugLoc();
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV) {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ } else {
+ if (GV->hasInitializer()) {
+ const Constant *C = dyn_cast<Constant>(GV->getInitializer());
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ DST = DAG.getConstant(CI->getValue(), Op.getValueType());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
+ DST = DAG.getConstantFP(CF->getValueAPF(),
+ Op.getValueType());
+ } else if (dyn_cast<ConstantAggregateZero>(C)) {
+ EVT VT = Op.getValueType();
+ if (VT.isInteger()) {
+ DST = DAG.getConstant(0, VT);
+ } else {
+ DST = DAG.getConstantFP(0, VT);
+ }
+ } else {
+ assert(!"lowering this type of Global Address "
+ "not implemented yet!");
+ C->dump();
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ }
+ } else {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+ return Result;
+}
+SDValue
+AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ EVT PtrVT = Op.getValueType();
+ SDValue Result;
+ if (CP->isMachineConstantPoolEntry()) {
+ Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ } else {
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ }
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
+{
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
+ return Result;
+}
+/// LowerFORMAL_ARGUMENTS - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg, hasStructRet, isMemReg
+ SDValue
+AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const
+{
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ //const Function *Fn = MF.getFunction();
+ //MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CallingConv::ID CC = MF.getFunction()->getCallingConv();
+ //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
+
+ CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // When more calling conventions are added, they need to be chosen here
+ CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
+ SDValue StackPtr;
+
+ //unsigned int FirstStackArgLoc = 0;
+
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ const TargetRegisterClass *RC = getRegClassFromType(
+ RegVT.getSimpleVT().SimpleTy);
+
+ unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ Reg,
+ RegVT);
+ // If this is an 8 or 16-bit value, it is really passed
+ // promoted to 32 bits. Insert an assert[sz]ext to capture
+ // this, then truncate to the right size.
+
+ if (VA.getLocInfo() == CCValAssign::SExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertSext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ } else if (VA.getLocInfo() == CCValAssign::ZExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertZext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ }
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ ArgValue = DAG.getNode(
+ ISD::TRUNCATE,
+ dl,
+ VA.getValVT(),
+ ArgValue);
+ }
+ // Add the value to the list of arguments
+ // to be passed in registers
+ InVals.push_back(ArgValue);
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }
+ } else if(VA.isMemLoc()) {
+ InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
+ dl, DAG, VA, MFI, i));
+ } else {
+ assert(0 && "found a Value Assign that is "
+ "neither a register or a memory location");
+ }
+ }
+ /*if (hasStructRet) {
+ assert(0 && "Has struct return is not yet implemented");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }*/
+
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
+ }
+ // This needs to be changed to non-zero if the return function needs
+ // to pop bytes
+ return Chain;
+}
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
+ assert(0 && "MemCopy does not exist yet");
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+ return DAG.getMemcpy(Chain,
+ Src.getDebugLoc(),
+ Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*IsVol=*/false, /*AlwaysInline=*/true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+SDValue
+AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const
+{
+ unsigned int LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD,
+ dl,
+ getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal()) {
+ PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
+ } else {
+ PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+ }
+ return PtrOff;
+}
+/// LowerCAL - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall, hasStructRet
+SDValue
+AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
+ bool& isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const
+{
+ isTailCall = false;
+ MachineFunction& MF = DAG.getMachineFunction();
+ // FIXME: DO we need to handle fast calling conventions and tail call
+ // optimizations?? X86/PPC ISelLowering
+ /*bool hasStructRet = (TheCall->getNumArgs())
+ ? TheCall->getArgFlags(0).device()->isSRet()
+ : false;*/
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // Analyize the calling operands, but need to change
+ // if we have more than one calling convetion
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+ unsigned int NumBytes = CCInfo.getNextStackOffset();
+ if (isTailCall) {
+ assert(isTailCall && "Tail Call not handled yet!");
+ // See X86/PPC ISelLowering
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+ //unsigned int FirstStacArgLoc = 0;
+ //int LastArgStackLoc = 0;
+
+ // Walk the register/memloc assignments, insert copies/loads
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = OutVals[i];
+ //Promote the value if needed
+ switch(VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (VA.isMemLoc()) {
+ // Create the frame index object for this incoming parameter
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ } else {
+ assert(0 && "Not a Reg/Mem Loc, major error!");
+ }
+ }
+ if (!MemOpChains.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor,
+ dl,
+ MVT::Other,
+ &MemOpChains[0],
+ MemOpChains.size());
+ }
+ SDValue InFlag;
+ if (!isTailCall) {
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain,
+ dl,
+ RegsToPass[i].first,
+ RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress/
+ // TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+ }
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ }
+ else if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1708
+ }
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
+ SmallVector<SDValue, 8> Ops;
+
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1721
+ }
+ // If this is a direct call, pass the chain and the callee
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1739
+ }
+
+ // Add argument registers to the end of the list so that they are known
+ // live into the call
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(
+ RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+ if (InFlag.getNode()) {
+ Ops.push_back(InFlag);
+ }
+
+ // Emit Tail Call
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1762
+ }
+
+ Chain = DAG.getNode(AMDILISD::CALL,
+ dl,
+ NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node
+ Chain = DAG.getCALLSEQ_END(
+ Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ // Handle result values, copying them out of physregs into vregs that
+ // we return
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
+}
+static void checkMADType(
+ SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
+{
+ bool globalLoadStore = false;
+ is24bitMAD = false;
+ is32bitMAD = false;
+ return;
+ assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
+ "this to work correctly!");
+ if (Op.getNode()->use_empty()) {
+ return;
+ }
+ for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
+ nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
+ SDNode *ptr = *nBegin;
+ const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
+ // If we are not a LSBaseSDNode then we don't do this
+ // optimization.
+ // If we are a LSBaseSDNode, but the op is not the offset
+ // or base pointer, then we don't do this optimization
+ // (i.e. we are the value being stored)
+ if (!lsNode ||
+ (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
+ return;
+ }
+ const PointerType *PT =
+ dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
+ unsigned as = PT->getAddressSpace();
+ switch(as) {
+ default:
+ globalLoadStore = true;
+ case AMDILAS::PRIVATE_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::CONSTANT_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::LOCAL_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::REGION_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ }
+ }
+ if (globalLoadStore) {
+ is32bitMAD = true;
+ } else {
+ is24bitMAD = true;
+ }
+}
+
+SDValue
+AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ const AMDILSubtarget *stm = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ bool isVec = OVT.isVector();
+ if (OVT.getScalarType() == MVT::i64) {
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
+ && INTTY == MVT::i32) {
+ DST = DAG.getNode(AMDILISD::ADD,
+ DL,
+ OVT,
+ LHS, RHS);
+ } else {
+ SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
+ // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
+ LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
+ RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
+ LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
+ RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+ INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
+ SDValue cmp;
+ cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ INTLO, RHSLO);
+ cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
+ INTLO, INTHI);
+ }
+ } else {
+ if (LHS.getOpcode() == ISD::FrameIndex ||
+ RHS.getOpcode() == ISD::FrameIndex) {
+ DST = DAG.getNode(AMDILISD::ADDADDR,
+ DL,
+ OVT,
+ LHS, RHS);
+ } else {
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+ && LHS.getNumOperands()
+ && RHS.getNumOperands()) {
+ bool is24bitMAD = false;
+ bool is32bitMAD = false;
+ const ConstantSDNode *LHSConstOpCode =
+ dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
+ const ConstantSDNode *RHSConstOpCode =
+ dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
+ if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
+ || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
+ || LHS.getOpcode() == ISD::MUL
+ || RHS.getOpcode() == ISD::MUL) {
+ SDValue Op1, Op2, Op3;
+ // FIXME: Fix this so that it works for unsigned 24bit ops.
+ if (LHS.getOpcode() == ISD::MUL) {
+ Op1 = LHS.getOperand(0);
+ Op2 = LHS.getOperand(1);
+ Op3 = RHS;
+ } else if (RHS.getOpcode() == ISD::MUL) {
+ Op1 = RHS.getOperand(0);
+ Op2 = RHS.getOperand(1);
+ Op3 = LHS;
+ } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
+ Op1 = LHS.getOperand(0);
+ Op2 = DAG.getConstant(
+ 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
+ Op3 = RHS;
+ } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
+ Op1 = RHS.getOperand(0);
+ Op2 = DAG.getConstant(
+ 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
+ Op3 = LHS;
+ }
+ checkMADType(Op, stm, is24bitMAD, is32bitMAD);
+ // We can possibly do a MAD transform!
+ if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+ uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
+ SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+ DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+ DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
+ Op1, Op2, Op3);
+ } else if(is32bitMAD) {
+ SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+ DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+ DL, Tys, DAG.getEntryNode(),
+ DAG.getConstant(
+ AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
+ Op1, Op2, Op3);
+ }
+ }
+ }
+ DST = DAG.getNode(AMDILISD::ADD,
+ DL,
+ OVT,
+ LHS, RHS);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
+ uint32_t bits) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY = Op.getValueType();
+ EVT FPTY;
+ if (INTTY.isVector()) {
+ FPTY = EVT(MVT::getVectorVT(MVT::f32,
+ INTTY.getVectorNumElements()));
+ } else {
+ FPTY = EVT(MVT::f32);
+ }
+ /* static inline uint
+ __clz_Nbit(uint x)
+ {
+ int xor = 0x3f800000U | x;
+ float tp = as_float(xor);
+ float t = tp + -1.0f;
+ uint tint = as_uint(t);
+ int cmp = (x != 0);
+ uint tsrc = tint >> 23;
+ uint tmask = tsrc & 0xffU;
+ uint cst = (103 + N)U - tmask;
+ return cmp ? cst : N;
+ }
+ */
+ assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
+ && "genCLZu16 only works on 32bit types");
+ // uint x = Op
+ SDValue x = Op;
+ // xornode = 0x3f800000 | x
+ SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
+ DAG.getConstant(0x3f800000, INTTY), x);
+ // float tp = as_float(xornode)
+ SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
+ // float t = tp + -1.0f
+ SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
+ DAG.getConstantFP(-1.0f, FPTY));
+ // uint tint = as_uint(t)
+ SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
+ // int cmp = (x != 0)
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
+ DAG.getConstant(0, INTTY));
+ // uint tsrc = tint >> 23
+ SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
+ DAG.getConstant(23, INTTY));
+ // uint tmask = tsrc & 0xFF
+ SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
+ DAG.getConstant(0xFFU, INTTY));
+ // uint cst = (103 + bits) - tmask
+ SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
+ DAG.getConstant((103U + bits), INTTY), tmask);
+ // return cmp ? cst : N
+ cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
+ DAG.getConstant(bits, INTTY));
+ return cst;
+}
+
+SDValue
+AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = SDValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY = Op.getValueType();
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ //__clz_32bit(uint u)
+ //{
+ // int z = __amdil_ffb_hi(u) ;
+ // return z < 0 ? 32 : z;
+ // }
+ // uint u = op
+ SDValue u = Op;
+ // int z = __amdil_ffb_hi(u)
+ SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
+ // int cmp = z < 0
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ z, DAG.getConstant(0, INTTY));
+ // return cmp ? 32 : z
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
+ DAG.getConstant(32, INTTY), z);
+ } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // static inline uint
+ //__clz_32bit(uint x)
+ //{
+ // uint zh = __clz_16bit(x >> 16);
+ // uint zl = __clz_16bit(x & 0xffffU);
+ // return zh == 16U ? 16U + zl : zh;
+ //}
+ // uint x = Op
+ SDValue x = Op;
+ // uint xs16 = x >> 16
+ SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
+ DAG.getConstant(16, INTTY));
+ // uint zh = __clz_16bit(xs16)
+ SDValue zh = genCLZuN(xs16, DAG, 16);
+ // uint xa16 = x & 0xFFFF
+ SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
+ DAG.getConstant(0xFFFFU, INTTY));
+ // uint zl = __clz_16bit(xa16)
+ SDValue zl = genCLZuN(xa16, DAG, 16);
+ // uint cmp = zh == 16U
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zh, DAG.getConstant(16U, INTTY));
+ // uint zl16 = zl + 16
+ SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
+ DAG.getConstant(16, INTTY), zl);
+ // return cmp ? zl16 : zh
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
+ cmp, zl16, zh);
+ } else {
+ assert(0 && "Attempting to generate a CLZ function with an"
+ " unknown graphics card");
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = SDValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY;
+ EVT LONGTY = Op.getValueType();
+ bool isVec = LONGTY.isVector();
+ if (isVec) {
+ INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
+ .getVectorNumElements()));
+ } else {
+ INTTY = EVT(MVT::i32);
+ }
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ // Evergreen:
+ // static inline uint
+ // __clz_u64(ulong x)
+ // {
+ //uint zhi = __clz_32bit((uint)(x >> 32));
+ //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
+ //return zhi == 32U ? 32U + zlo : zhi;
+ //}
+ //ulong x = op
+ SDValue x = Op;
+ // uint xhi = x >> 32
+ SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+ // uint xlo = x & 0xFFFFFFFF
+ SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
+ // uint zhi = __clz_32bit(xhi)
+ SDValue zhi = genCLZu32(xhi, DAG);
+ // uint zlo = __clz_32bit(xlo)
+ SDValue zlo = genCLZu32(xlo, DAG);
+ // uint cmp = zhi == 32
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhi, DAG.getConstant(32U, INTTY));
+ // uint zlop32 = 32 + zlo
+ SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
+ DAG.getConstant(32U, INTTY), zlo);
+ // return cmp ? zlop32: zhi
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
+ } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // HD4XXX:
+ // static inline uint
+ //__clz_64bit(ulong x)
+ //{
+ //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
+ //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
+ //uint zl = __clz_23bit((uint)x & 0x7fffffU);
+ //uint r = zh == 18U ? 18U + zm : zh;
+ //return zh + zm == 41U ? 41U + zl : r;
+ //}
+ //ulong x = Op
+ SDValue x = Op;
+ // ulong xs46 = x >> 46
+ SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
+ DAG.getConstant(46, LONGTY));
+ // uint ixs46 = (uint)xs46
+ SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
+ // ulong xs23 = x >> 23
+ SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
+ DAG.getConstant(23, LONGTY));
+ // uint ixs23 = (uint)xs23
+ SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
+ // uint xs23m23 = ixs23 & 0x7FFFFF
+ SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
+ DAG.getConstant(0x7fffffU, INTTY));
+ // uint ix = (uint)x
+ SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+ // uint xm23 = ix & 0x7FFFFF
+ SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
+ DAG.getConstant(0x7fffffU, INTTY));
+ // uint zh = __clz_23bit(ixs46)
+ SDValue zh = genCLZuN(ixs46, DAG, 23);
+ // uint zm = __clz_23bit(xs23m23)
+ SDValue zm = genCLZuN(xs23m23, DAG, 23);
+ // uint zl = __clz_23bit(xm23)
+ SDValue zl = genCLZuN(xm23, DAG, 23);
+ // uint zhm5 = zh - 5
+ SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
+ DAG.getConstant(-5U, INTTY));
+ SDValue const18 = DAG.getConstant(18, INTTY);
+ SDValue const41 = DAG.getConstant(41, INTTY);
+ // uint cmp1 = zh = 18
+ SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhm5, const18);
+ // uint zhm5zm = zhm5 + zh
+ SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
+ // uint cmp2 = zhm5zm == 41
+ SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhm5zm, const41);
+ // uint zmp18 = zhm5 + 18
+ SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
+ // uint zlp41 = zl + 41
+ SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
+ // uint r = cmp1 ? zmp18 : zh
+ SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
+ cmp1, zmp18, zhm5);
+ // return cmp2 ? zlp41 : r
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
+ } else {
+ assert(0 && "Attempting to generate a CLZ function with an"
+ " unknown graphics card");
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
+ bool includeSign) const
+{
+ EVT INTVT;
+ EVT LONGVT;
+ SDValue DST;
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT RHSVT = RHS.getValueType();
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
+ .getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
+ .getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // unsigned version:
+ // uint uhi = (uint)(d * 0x1.0p-32);
+ // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
+ // return as_ulong2((uint2)(ulo, uhi));
+ //
+ // signed version:
+ // double ad = fabs(d);
+ // long l = unsigned_version(ad);
+ // long nl = -l;
+ // return d == ad ? l : nl;
+ SDValue d = RHS;
+ if (includeSign) {
+ d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
+ }
+ SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
+ DAG.getConstantFP(0x2f800000, RHSVT));
+ SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
+ SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
+ ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
+ DAG.getConstantFP(0xcf800000, RHSVT), d);
+ SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
+ SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
+ if (includeSign) {
+ SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
+ SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
+ RHS, d);
+ l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
+ }
+ DST = l;
+ } else {
+ /*
+ __attribute__((always_inline)) long
+ cast_f64_to_i64(double d)
+ {
+ // Convert d in to 32-bit components
+ long x = as_long(d);
+ xhi = LCOMPHI(x);
+ xlo = LCOMPLO(x);
+
+ // Generate 'normalized' mantissa
+ mhi = xhi | 0x00100000; // hidden bit
+ mhi <<= 11;
+ temp = xlo >> (32 - 11);
+ mhi |= temp
+ mlo = xlo << 11;
+
+ // Compute shift right count from exponent
+ e = (xhi >> (52-32)) & 0x7ff;
+ sr = 1023 + 63 - e;
+ srge64 = sr >= 64;
+ srge32 = sr >= 32;
+
+ // Compute result for 0 <= sr < 32
+ rhi0 = mhi >> (sr &31);
+ rlo0 = mlo >> (sr &31);
+ temp = mhi << (32 - sr);
+ temp |= rlo0;
+ rlo0 = sr ? temp : rlo0;
+
+ // Compute result for 32 <= sr
+ rhi1 = 0;
+ rlo1 = srge64 ? 0 : rhi0;
+
+ // Pick between the 2 results
+ rhi = srge32 ? rhi1 : rhi0;
+ rlo = srge32 ? rlo1 : rlo0;
+
+ // Optional saturate on overflow
+ srlt0 = sr < 0;
+ rhi = srlt0 ? MAXVALUE : rhi;
+ rlo = srlt0 ? MAXVALUE : rlo;
+
+ // Create long
+ res = LCREATE( rlo, rhi );
+
+ // Deal with sign bit (ignoring whether result is signed or unsigned value)
+ if (includeSign) {
+ sign = ((signed int) xhi) >> 31; fill with sign bit
+ sign = LCREATE( sign, sign );
+ res += sign;
+ res ^= sign;
+ }
+
+ return res;
+ }
+ */
+ SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
+ SDValue c32 = DAG.getConstant( 32, INTVT );
+
+ // Convert d in to 32-bit components
+ SDValue d = RHS;
+ SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Generate 'normalized' mantissa
+ SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
+ xhi, DAG.getConstant( 0x00100000, INTVT ) );
+ mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
+ SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
+ xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
+ mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
+ SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
+
+ // Compute shift right count from exponent
+ SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
+ xhi, DAG.getConstant( 52-32, INTVT ) );
+ e = DAG.getNode( ISD::AND, DL, INTVT,
+ e, DAG.getConstant( 0x7ff, INTVT ) );
+ SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 1023 + 63, INTVT ), e );
+ SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(64, INTVT));
+ SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(32, INTVT));
+
+ // Compute result for 0 <= sr < 32
+ SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
+ SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
+ temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
+ temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
+ temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp );
+ rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
+
+ // Compute result for 32 <= sr
+ SDValue rhi1 = DAG.getConstant( 0, INTVT );
+ SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge64, rhi1, rhi0 );
+
+ // Pick between the 2 results
+ SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, rhi1, rhi0 );
+ SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, rlo1, rlo0 );
+
+ // Create long
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+
+ // Deal with sign bit
+ if (includeSign) {
+ SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
+ xhi, DAG.getConstant( 31, INTVT ) );
+ sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
+ res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
+ res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
+ }
+ DST = res;
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
+ bool includeSign) const
+{
+ EVT INTVT;
+ EVT LONGVT;
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT RHSVT = RHS.getValueType();
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ /*
+ __attribute__((always_inline)) int
+ cast_f64_to_[u|i]32(double d)
+ {
+ // Convert d in to 32-bit components
+ long x = as_long(d);
+ xhi = LCOMPHI(x);
+ xlo = LCOMPLO(x);
+
+ // Generate 'normalized' mantissa
+ mhi = xhi | 0x00100000; // hidden bit
+ mhi <<= 11;
+ temp = xlo >> (32 - 11);
+ mhi |= temp
+
+ // Compute shift right count from exponent
+ e = (xhi >> (52-32)) & 0x7ff;
+ sr = 1023 + 31 - e;
+ srge32 = sr >= 32;
+
+ // Compute result for 0 <= sr < 32
+ res = mhi >> (sr &31);
+ res = srge32 ? 0 : res;
+
+ // Optional saturate on overflow
+ srlt0 = sr < 0;
+ res = srlt0 ? MAXVALUE : res;
+
+ // Deal with sign bit (ignoring whether result is signed or unsigned value)
+ if (includeSign) {
+ sign = ((signed int) xhi) >> 31; fill with sign bit
+ res += sign;
+ res ^= sign;
+ }
+
+ return res;
+ }
+ */
+ SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
+
+ // Convert d in to 32-bit components
+ SDValue d = RHS;
+ SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Generate 'normalized' mantissa
+ SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
+ xhi, DAG.getConstant( 0x00100000, INTVT ) );
+ mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
+ SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
+ xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
+ mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
+
+ // Compute shift right count from exponent
+ SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
+ xhi, DAG.getConstant( 52-32, INTVT ) );
+ e = DAG.getNode( ISD::AND, DL, INTVT,
+ e, DAG.getConstant( 0x7ff, INTVT ) );
+ SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 1023 + 31, INTVT ), e );
+ SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(32, INTVT));
+
+ // Compute result for 0 <= sr < 32
+ SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
+ res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, DAG.getConstant(0,INTVT), res );
+
+ // Deal with sign bit
+ if (includeSign) {
+ SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
+ xhi, DAG.getConstant( 31, INTVT ) );
+ res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
+ res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
+ }
+ return res;
+}
+SDValue
+AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue DST;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (RST == MVT::f64 && RHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
+ DST, op, DAG.getTargetConstant(x, MVT::i32));
+ }
+ }
+ } else {
+ if (RST == MVT::f64
+ && LST == MVT::i32) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genf64toi32(RHS, DAG, true);
+ }
+ } else if (RST == MVT::f64
+ && LST == MVT::i64) {
+ DST = genf64toi64(RHS, DAG, true);
+ } else if (RST == MVT::f64
+ && (LST == MVT::i8 || LST == MVT::i16)) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
+ } else {
+ SDValue ToInt = genf64toi32(RHS, DAG, true);
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
+ }
+
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST;
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (RST == MVT::f64 && RHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
+ DST, op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else {
+ if (RST == MVT::f64
+ && LST == MVT::i32) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genf64toi32(RHS, DAG, false);
+ }
+ } else if (RST == MVT::f64
+ && LST == MVT::i64) {
+ DST = genf64toi64(RHS, DAG, false);
+ } else if (RST == MVT::f64
+ && (LST == MVT::i8 || LST == MVT::i16)) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
+ } else {
+ SDValue ToInt = genf64toi32(RHS, DAG, false);
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
+ }
+
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
+ SelectionDAG &DAG) const
+{
+ EVT RHSVT = RHS.getValueType();
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT INTVT;
+ EVT LONGVT;
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ SDValue x = RHS;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (stm->calVersion() >= CAL_VERSION_SC_135) {
+ // unsigned x = RHS;
+ // ulong xd = (ulong)(0x4330_0000 << 32) | x;
+ // double d = as_double( xd );
+ // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
+ SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
+ DAG.getConstant( 0x43300000, INTVT ) );
+ SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+ SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
+ DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
+ return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
+ } else {
+ SDValue clz = genCLZu32(x, DAG);
+
+ // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
+ // Except for an input 0... which requires a 0 exponent
+ SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( (1023+31), INTVT), clz );
+ exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
+
+ // Normalize frac
+ SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
+
+ // Eliminate hidden bit
+ rhi = DAG.getNode( ISD::AND, DL, INTVT,
+ rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
+
+ // Pack exponent and frac
+ SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
+ rhi, DAG.getConstant( (32 - 11), INTVT ) );
+ rhi = DAG.getNode( ISD::SRL, DL, INTVT,
+ rhi, DAG.getConstant( 11, INTVT ) );
+ exp = DAG.getNode( ISD::SHL, DL, INTVT,
+ exp, DAG.getConstant( 20, INTVT ) );
+ rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
+
+ // Convert 2 x 32 in to 1 x 64, then to double precision float type
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+ return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
+ }
+}
+SDValue
+AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
+ SelectionDAG &DAG) const
+{
+ EVT RHSVT = RHS.getValueType();
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT INTVT;
+ EVT LONGVT;
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ INTVT = EVT(MVT::i32);
+ }
+ LONGVT = RHSVT;
+ SDValue x = RHS;
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // double dhi = (double)(as_uint2(x).y);
+ // double dlo = (double)(as_uint2(x).x);
+ // return mad(dhi, 0x1.0p+32, dlo)
+ SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
+ dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
+ SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
+ dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
+ return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
+ DAG.getConstantFP(0x4f800000, LHSVT), dlo);
+ } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
+ // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
+ // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
+ // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL
+ SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
+ SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+ SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
+ SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
+ SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
+ SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
+ DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
+ hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
+ return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
+
+ } else {
+ SDValue clz = genCLZu64(x, DAG);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
+ SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( (1023+63), INTVT), clz );
+ SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
+ exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ mash, exp, mash ); // exp = exp, or 0 if input was 0
+
+ // Normalize frac
+ SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
+ clz, DAG.getConstant( 31, INTVT ) );
+ SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 32, INTVT ), clz31 );
+ SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
+ SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
+ t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
+ SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
+ SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
+ SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
+ SDValue rlo2 = DAG.getConstant( 0, INTVT );
+ SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
+ clz, DAG.getConstant( 32, INTVT ) );
+ SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ clz32, rhi2, rhi1 );
+ SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ clz32, rlo2, rlo1 );
+
+ // Eliminate hidden bit
+ rhi = DAG.getNode( ISD::AND, DL, INTVT,
+ rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
+
+ // Save bits needed to round properly
+ SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
+ rlo, DAG.getConstant( 0x7ff, INTVT ) );
+
+ // Pack exponent and frac
+ rlo = DAG.getNode( ISD::SRL, DL, INTVT,
+ rlo, DAG.getConstant( 11, INTVT ) );
+ SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
+ rhi, DAG.getConstant( (32 - 11), INTVT ) );
+ rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
+ rhi = DAG.getNode( ISD::SRL, DL, INTVT,
+ rhi, DAG.getConstant( 11, INTVT ) );
+ exp = DAG.getNode( ISD::SHL, DL, INTVT,
+ exp, DAG.getConstant( 20, INTVT ) );
+ rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
+
+ // Compute rounding bit
+ SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
+ rlo, DAG.getConstant( 1, INTVT ) );
+ SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
+ round, DAG.getConstant( 0x3ff, INTVT ) );
+ grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
+ grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
+ grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
+ round = DAG.getNode( ISD::SRL, DL, INTVT,
+ round, DAG.getConstant( 10, INTVT ) );
+ round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
+
+ // Add rounding bit
+ SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
+ round, DAG.getConstant( 0, INTVT ) );
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+ res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
+ return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
+ }
+}
+SDValue
+AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue DST;
+ EVT INTVT;
+ EVT LONGVT;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (LST == MVT::f64 && LHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ DST = Op;
+ for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
+ op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else {
+
+ if (RST == MVT::i32
+ && LST == MVT::f64) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genu32tof64(RHS, LHSVT, DAG);
+ }
+ } else if (RST == MVT::i64
+ && LST == MVT::f64) {
+ DST = genu64tof64(RHS, LHSVT, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT INTVT;
+ EVT LONGVT;
+ SDValue DST;
+ bool isVec = RHSVT.isVector();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (LST == MVT::f64 && LHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
+ op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else {
+
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ if ((RST == MVT::i32 || RST == MVT::i64)
+ && LST == MVT::f64) {
+ if (RST == MVT::i32) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ return DST;
+ }
+ }
+ SDValue c31 = DAG.getConstant( 31, INTVT );
+ SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
+
+ SDValue S; // Sign, as 0 or -1
+ SDValue Sbit; // Sign bit, as one bit, MSB only.
+ if (RST == MVT::i32) {
+ Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
+ S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
+ } else { // 64-bit case... SRA of 64-bit values is slow
+ SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
+ Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
+ SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
+ S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
+ }
+
+ // get abs() of input value, given sign as S (0 or -1)
+ // SpI = RHS + S
+ SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
+ // SpIxS = SpI ^ S
+ SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
+
+ // Convert unsigned value to double precision
+ SDValue R;
+ if (RST == MVT::i32) {
+ // r = cast_u32_to_f64(SpIxS)
+ R = genu32tof64(SpIxS, LHSVT, DAG);
+ } else {
+ // r = cast_u64_to_f64(SpIxS)
+ R = genu64tof64(SpIxS, LHSVT, DAG);
+ }
+
+ // drop in the sign bit
+ SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
+ SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
+ SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
+ thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
+ t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
+ DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ bool isVec = RHS.getValueType().isVector();
+ if (OVT.getScalarType() == MVT::i64) {
+ /*const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());*/
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
+ // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
+ LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
+ RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
+ LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
+ RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+ INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
+ INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
+ //TODO: need to use IBORROW on HD5XXX and later hardware
+ SDValue cmp;
+ if (OVT == MVT::i64) {
+ cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSLO, RHSLO);
+ } else {
+ SDValue cmplo;
+ SDValue cmphi;
+ SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
+ SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
+ SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
+ SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
+ cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSRLO, RHSRLO);
+ cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSRHI, RHSRHI);
+ cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
+ cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
+ cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
+ }
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
+ INTLO, INTHI);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::f64) {
+ DST = LowerFDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::f32) {
+ DST = LowerFDIV32(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerUDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerUDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerUDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerUREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerUREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerUREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerUREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ bool isVec = OVT.isVector();
+ if (OVT.getScalarType() != MVT::i64)
+ {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
+ // TODO: This needs to be turned into a tablegen pattern
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ // mul64(h1, l1, h0, l0)
+ SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY, LHS);
+ SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY, LHS);
+ SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY, RHS);
+ SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY, RHS);
+ // MULLO_UINT_1 r1, h0, l1
+ SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, RHSHI, LHSLO);
+ // MULLO_UINT_1 r2, h1, l0
+ SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, RHSLO, LHSHI);
+ // ADD_INT hr, r1, r2
+ SDValue ADDHI = DAG.getNode(ISD::ADD,
+ DL,
+ INTTY, RHILLO, RLOHHI);
+ // MULHI_UINT_1 r3, l1, l0
+ SDValue RLOLLO = DAG.getNode(ISD::MULHU,
+ DL,
+ INTTY, RHSLO, LHSLO);
+ // ADD_INT hr, hr, r3
+ SDValue HIGH = DAG.getNode(ISD::ADD,
+ DL,
+ INTTY, ADDHI, RLOLLO);
+ // MULLO_UINT_1 l3, l1, l0
+ SDValue LOW = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, LHSLO, RHSLO);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ OVT, LOW, HIGH);
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
+{
+ EVT VT = Op.getValueType();
+ //printSDValue(Op, 1);
+ SDValue Nodes1;
+ SDValue second;
+ SDValue third;
+ SDValue fourth;
+ DebugLoc DL = Op.getDebugLoc();
+ Nodes1 = DAG.getNode(AMDILISD::VBUILD,
+ DL,
+ VT, Op.getOperand(0));
+ bool allEqual = true;
+ for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
+ if (Op.getOperand(0) != Op.getOperand(x)) {
+ allEqual = false;
+ break;
+ }
+ }
+ if (allEqual) {
+ return Nodes1;
+ }
+ switch(Op.getNumOperands()) {
+ default:
+ case 1:
+ break;
+ case 4:
+ fourth = Op.getOperand(3);
+ if (fourth.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ fourth,
+ DAG.getConstant(7, MVT::i32));
+ }
+ case 3:
+ third = Op.getOperand(2);
+ if (third.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ third,
+ DAG.getConstant(6, MVT::i32));
+ }
+ case 2:
+ second = Op.getOperand(1);
+ if (second.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ second,
+ DAG.getConstant(5, MVT::i32));
+ }
+ break;
+ };
+ return Nodes1;
+}
+
+SDValue
+AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ const SDValue *ptr = NULL;
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ uint32_t swizzleNum = 0;
+ SDValue DST;
+ if (!VT.isVector()) {
+ SDValue Res = Op.getOperand(0);
+ return Res;
+ }
+
+ if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
+ ptr = &Op.getOperand(1);
+ } else {
+ ptr = &Op.getOperand(0);
+ }
+ if (CSDN) {
+ swizzleNum = (uint32_t)CSDN->getZExtValue();
+ uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
+ uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
+ DST = DAG.getNode(AMDILISD::VINSERT,
+ DL,
+ VT,
+ Op.getOperand(0),
+ *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ } else {
+ uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
+ uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
+ SDValue res = DAG.getNode(AMDILISD::VINSERT,
+ DL, VT, Op.getOperand(0), *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
+ mask2 = 0x04030201 & ~(0xFF << (x * 8));
+ mask3 = 0x01010101 & (0xFF << (x * 8));
+ SDValue t = DAG.getNode(AMDILISD::VINSERT,
+ DL, VT, Op.getOperand(0), *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
+ DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
+ Op.getOperand(2), DAG.getConstant(x, MVT::i32));
+ c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
+ res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
+ }
+ DST = res;
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ //printSDValue(Op, 1);
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ uint64_t swizzleNum = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Res;
+ if (!Op.getOperand(0).getValueType().isVector()) {
+ Res = Op.getOperand(0);
+ return Res;
+ }
+ if (CSDN) {
+ // Static vector extraction
+ swizzleNum = CSDN->getZExtValue() + 1;
+ Res = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT,
+ Op.getOperand(0),
+ DAG.getTargetConstant(swizzleNum, MVT::i32));
+ } else {
+ SDValue Op1 = Op.getOperand(1);
+ uint32_t vecSize = 4;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT, Op0,
+ DAG.getTargetConstant(1, MVT::i32));
+ if (Op0.getValueType().isVector()) {
+ vecSize = Op0.getValueType().getVectorNumElements();
+ }
+ for (uint32_t x = 2; x <= vecSize; ++x) {
+ SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT, Op0,
+ DAG.getTargetConstant(x, MVT::i32));
+ SDValue c = DAG.getNode(AMDILISD::CMP,
+ DL, Op1.getValueType(),
+ DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
+ Op1, DAG.getConstant(x, MVT::i32));
+ res = DAG.getNode(AMDILISD::CMOVLOG, DL,
+ VT, c, t, res);
+
+ }
+ Res = res;
+ }
+ return Res;
+}
+
+SDValue
+AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ uint32_t vecSize = Op.getValueType().getVectorNumElements();
+ SDValue src = Op.getOperand(0);
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ uint64_t offset = 0;
+ EVT vecType = Op.getValueType().getVectorElementType();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Result;
+ if (CSDN) {
+ offset = CSDN->getZExtValue();
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL,vecType, src, DAG.getConstant(offset, MVT::i32));
+ Result = DAG.getNode(AMDILISD::VBUILD, DL,
+ Op.getValueType(), Result);
+ for (uint32_t x = 1; x < vecSize; ++x) {
+ SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
+ src, DAG.getConstant(offset + x, MVT::i32));
+ if (elt.getOpcode() != ISD::UNDEF) {
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ Op.getValueType(), Result, elt,
+ DAG.getConstant(x, MVT::i32));
+ }
+ }
+ } else {
+ SDValue idx = Op.getOperand(1);
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, vecType, src, idx);
+ Result = DAG.getNode(AMDILISD::VBUILD, DL,
+ Op.getValueType(), Result);
+ for (uint32_t x = 1; x < vecSize; ++x) {
+ idx = DAG.getNode(ISD::ADD, DL, vecType,
+ idx, DAG.getConstant(1, MVT::i32));
+ SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
+ src, idx);
+ if (elt.getOpcode() != ISD::UNDEF) {
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ Op.getValueType(), Result, elt, idx);
+ }
+ }
+ }
+ return Result;
+}
+SDValue
+AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ SDValue Res = DAG.getNode(AMDILISD::VBUILD,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0));
+ return Res;
+}
+SDValue
+AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue andOp;
+ andOp = DAG.getNode(
+ AMDILISD::AND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return andOp;
+}
+SDValue
+AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue orOp;
+ orOp = DAG.getNode(AMDILISD::OR,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return orOp;
+}
+SDValue
+AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(AMDILISD::CMOVLOG,
+ DL,
+ Op.getValueType(), Cond, LHS, RHS);
+ return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TRUE = Op.getOperand(2);
+ SDValue FALSE = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ DebugLoc DL = Op.getDebugLoc();
+ bool skipCMov = false;
+ bool genINot = false;
+ EVT OVT = Op.getValueType();
+
+ // Check for possible elimination of cmov
+ if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
+ const ConstantSDNode *trueConst
+ = dyn_cast<ConstantSDNode>( TRUE.getNode() );
+ const ConstantSDNode *falseConst
+ = dyn_cast<ConstantSDNode>( FALSE.getNode() );
+ if (trueConst && falseConst) {
+ // both possible result values are constants
+ if (trueConst->isAllOnesValue()
+ && falseConst->isNullValue()) { // and convenient constants
+ skipCMov = true;
+ }
+ else if (trueConst->isNullValue()
+ && falseConst->isAllOnesValue()) { // less convenient
+ skipCMov = true;
+ genINot = true;
+ }
+ }
+ }
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ unsigned int AMDILCC = CondCCodeToCC(
+ SetCCOpcode,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+ Cond = DAG.getNode(
+ AMDILISD::CMP,
+ DL,
+ LHS.getValueType(),
+ DAG.getConstant(AMDILCC, MVT::i32),
+ LHS,
+ RHS);
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ if (genINot) {
+ Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
+ }
+ if (!skipCMov) {
+ Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
+ }
+ return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ unsigned int AMDILCC = CondCCodeToCC(
+ SetCCOpcode,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+ Cond = DAG.getNode(
+ AMDILISD::CMP,
+ DL,
+ LHS.getValueType(),
+ DAG.getConstant(AMDILCC, MVT::i32),
+ LHS,
+ RHS);
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(
+ ISD::AND,
+ DL,
+ Cond.getValueType(),
+ DAG.getConstant(1, Cond.getValueType()),
+ Cond);
+ return Cond;
+}
+
+SDValue
+AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Data = Op.getOperand(0);
+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+ DebugLoc DL = Op.getDebugLoc();
+ EVT DVT = Data.getValueType();
+ EVT BVT = BaseType->getVT();
+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+ unsigned shiftBits = srcBits - baseBits;
+ if (srcBits < 32) {
+ // If the op is less than 32 bits, then it needs to extend to 32bits
+ // so it can properly keep the upper bits valid.
+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+ shiftBits = 32 - baseBits;
+ DVT = IVT;
+ }
+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
+ // Shift left by 'Shift' bits.
+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+ // Signed shift Right by 'Shift' bits.
+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+ if (srcBits < 32) {
+ // Once the sign extension is done, the op needs to be converted to
+ // its original type.
+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+ }
+ return Data;
+}
+EVT
+AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
+{
+ int iSize = (size * numEle);
+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
+ if (!vEle) {
+ vEle = 1;
+ }
+ if (size == 64) {
+ if (vEle == 1) {
+ return EVT(MVT::i64);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
+ }
+ } else {
+ if (vEle == 1) {
+ return EVT(MVT::i32);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
+ }
+ }
+}
+
+SDValue
+AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Src = Op.getOperand(0);
+ SDValue Dst = Op;
+ SDValue Res;
+ DebugLoc DL = Op.getDebugLoc();
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Dst.getValueType();
+ // Lets bitcast the floating point types to an
+ // equivalent integer type before converting to vectors.
+ if (SrcVT.getScalarType().isFloatingPoint()) {
+ Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
+ SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
+ SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
+ Src);
+ SrcVT = Src.getValueType();
+ }
+ uint32_t ScalarSrcSize = SrcVT.getScalarType()
+ .getSimpleVT().getSizeInBits();
+ uint32_t ScalarDstSize = DstVT.getScalarType()
+ .getSimpleVT().getSizeInBits();
+ uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
+ bool isVec = SrcVT.isVector();
+ if (DstVT.getScalarType().isInteger() &&
+ (SrcVT.getScalarType().isInteger()
+ || SrcVT.getScalarType().isFloatingPoint())) {
+ if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
+ || (ScalarSrcSize == 64
+ && DstNumEle == 4
+ && ScalarDstSize == 16)) {
+ // This is the problematic case when bitcasting i64 <-> <4 x i16>
+ // This approach is a little different as we cannot generate a
+ // <4 x i64> vector
+ // as that is illegal in our backend and we are already past
+ // the DAG legalizer.
+ // So, in this case, we will do the following conversion.
+ // Case 1:
+ // %dst = <4 x i16> %src bitconvert i64 ==>
+ // %tmp = <4 x i16> %src convert <4 x i32>
+ // %tmp = <4 x i32> %tmp and 0xFFFF
+ // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
+ // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
+ // %dst = <2 x i32> %tmp bitcast i64
+ // case 2:
+ // %dst = i64 %src bitconvert <4 x i16> ==>
+ // %tmp = i64 %src bitcast <2 x i32>
+ // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
+ // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
+ // %tmp = <4 x i32> %tmp and 0xFFFF
+ // %dst = <4 x i16> %tmp bitcast <4 x i32>
+ SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
+ DAG.getConstant(0xFFFF, MVT::i32));
+ SDValue const16 = DAG.getConstant(16, MVT::i32);
+ if (ScalarDstSize == 64) {
+ // case 1
+ Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
+ Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
+ SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(0, MVT::i32));
+ SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(1, MVT::i32));
+ y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
+ SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(2, MVT::i32));
+ SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(3, MVT::i32));
+ w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
+ x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
+ y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
+ Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
+ return Res;
+ } else {
+ // case 2
+ SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
+ SDValue lor16
+ = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
+ SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
+ SDValue hir16
+ = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
+ SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
+ MVT::v4i32, lo);
+ SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(1, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, lor16, idxVal);
+ idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(2, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, hi, idxVal);
+ idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(3, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, hir16, idxVal);
+ resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
+ Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
+ return Res;
+ }
+ } else {
+ // There are four cases we need to worry about for bitcasts
+ // where the size of all
+ // source, intermediates and result is <= 128 bits, unlike
+ // the above case
+ // 1) Sub32bit bitcast 32bitAlign
+ // %dst = <4 x i8> bitcast i32
+ // (also <[2|4] x i16> to <[2|4] x i32>)
+ // 2) 32bitAlign bitcast Sub32bit
+ // %dst = i32 bitcast <4 x i8>
+ // 3) Sub32bit bitcast LargerSub32bit
+ // %dst = <2 x i8> bitcast i16
+ // (also <4 x i8> to <2 x i16>)
+ // 4) Sub32bit bitcast SmallerSub32bit
+ // %dst = i16 bitcast <2 x i8>
+ // (also <2 x i16> to <4 x i8>)
+ // This also only handles types that are powers of two
+ if ((ScalarDstSize & (ScalarDstSize - 1))
+ || (ScalarSrcSize & (ScalarSrcSize - 1))) {
+ } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
+ // case 1:
+ EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
+#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
+ SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
+#else
+ SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getUNDEF(IntTy.getScalarType()));
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ SrcVT.getScalarType(), Src,
+ DAG.getConstant(x, MVT::i32));
+ temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
+ res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
+ res, temp, idx);
+ }
+#endif
+ SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
+ SDValue *newEle = new SDValue[SrcNumEle];
+ res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ IntTy.getScalarType(), res,
+ DAG.getConstant(x, MVT::i32));
+ }
+ uint32_t Ratio = SrcNumEle / DstNumEle;
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ if (x % Ratio) {
+ newEle[x] = DAG.getNode(ISD::SHL, DL,
+ IntTy.getScalarType(), newEle[x],
+ DAG.getConstant(ScalarSrcSize * (x % Ratio),
+ MVT::i32));
+ }
+ }
+ for (uint32_t x = 0; x < SrcNumEle; x += 2) {
+ newEle[x] = DAG.getNode(ISD::OR, DL,
+ IntTy.getScalarType(), newEle[x], newEle[x + 1]);
+ }
+ if (ScalarSrcSize == 8) {
+ for (uint32_t x = 0; x < SrcNumEle; x += 4) {
+ newEle[x] = DAG.getNode(ISD::OR, DL,
+ IntTy.getScalarType(), newEle[x], newEle[x + 2]);
+ }
+ if (DstNumEle == 1) {
+ Dst = newEle[0];
+ } else {
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
+ newEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ DstVT, Dst, newEle[x * 4], idx);
+ }
+ }
+ } else {
+ if (DstNumEle == 1) {
+ Dst = newEle[0];
+ } else {
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
+ newEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ DstVT, Dst, newEle[x * 2], idx);
+ }
+ }
+ }
+ delete [] newEle;
+ return Dst;
+ } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
+ // case 2:
+ EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
+ SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getUNDEF(IntTy.getScalarType()));
+ uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ for (uint32_t y = 0; y < mult; ++y) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(),
+ DAG.getConstant(x * mult + y, MVT::i32));
+ SDValue t;
+ if (SrcNumEle > 1) {
+ t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, SrcVT.getScalarType(), Src,
+ DAG.getConstant(x, MVT::i32));
+ } else {
+ t = Src;
+ }
+ if (y != 0) {
+ t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
+ t, DAG.getConstant(y * ScalarDstSize,
+ MVT::i32));
+ }
+ vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
+ DL, IntTy, vec, t, idx);
+ }
+ }
+ Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
+ return Dst;
+ } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
+ // case 3:
+ SDValue *numEle = new SDValue[SrcNumEle];
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ MVT::i8, Src, DAG.getConstant(x, MVT::i32));
+ numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
+ numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
+ DAG.getConstant(0xFF, MVT::i16));
+ }
+ for (uint32_t x = 1; x < SrcNumEle; x += 2) {
+ numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
+ DAG.getConstant(8, MVT::i16));
+ numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
+ numEle[x-1], numEle[x]);
+ }
+ if (DstNumEle > 1) {
+ // If we are not a scalar i16, the only other case is a
+ // v2i16 since we can't have v8i8 at this point, v4i16
+ // cannot be generated
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
+ numEle[0]);
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(1, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
+ Dst, numEle[2], idx);
+ } else {
+ Dst = numEle[0];
+ }
+ delete [] numEle;
+ return Dst;
+ } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
+ // case 4:
+ SDValue *numEle = new SDValue[DstNumEle];
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ MVT::i16, Src, DAG.getConstant(x, MVT::i32));
+ numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
+ numEle[x * 2], DAG.getConstant(8, MVT::i16));
+ }
+ MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
+ Dst, numEle[x], idx);
+ }
+ delete [] numEle;
+ ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
+ Res = DAG.getSExtOrTrunc(Dst, DL, ty);
+ return Res;
+ }
+ }
+ }
+ Res = DAG.getNode(AMDILISD::BITCONV,
+ Dst.getDebugLoc(),
+ Dst.getValueType(), Src);
+ return Res;
+}
+
+SDValue
+AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ unsigned int SPReg = AMDIL::SP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue SP = DAG.getCopyFromReg(Chain,
+ DL,
+ SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::ADD,
+ DL,
+ MVT::i32, SP, Size);
+ Chain = DAG.getCopyToReg(SP.getValue(1),
+ DL,
+ SPReg, NewSP);
+ SDValue Ops[2] = {NewSP, Chain};
+ Chain = DAG.getMergeValues(Ops, 2 ,DL);
+ return Chain;
+}
+SDValue
+AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+ SDValue Result;
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue JumpT = Op.getOperand(4);
+ SDValue CmpValue;
+ ISD::CondCode CC = CCNode->get();
+ SDValue Result;
+ unsigned int cmpOpcode = CondCCodeToCC(
+ CC,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ CmpValue = DAG.getNode(
+ AMDILISD::CMP,
+ Op.getDebugLoc(),
+ LHS.getValueType(),
+ DAG.getConstant(cmpOpcode, MVT::i32),
+ LHS, RHS);
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ CmpValue.getDebugLoc(),
+ MVT::Other, Chain,
+ JumpT, CmpValue);
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Result = DAG.getNode(
+ AMDILISD::DP_TO_FP,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Result = DAG.getNode(
+ AMDILISD::VCONCAT,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return Result;
+}
+// LowerRET - Lower an ISD::RET node.
+SDValue
+AMDILTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG)
+const
+{
+ //MachineFunction& MF = DAG.getMachineFunction();
+ // CCValAssign - represent the assignment of the return value
+ // to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
+ MRI.addLiveOut(RVLocs[i].getLocReg());
+ }
+ }
+ // FIXME: implement this when tail call is implemented
+ // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
+ // both x86 and ppc implement this in ISelLowering
+
+ // Regular return here
+ SDValue Flag;
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain);
+ RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ SDValue ValToCopy = OutVals[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ // ISD::Ret => ret chain, (regnum1, val1), ...
+ // So i * 2 + 1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain,
+ dl,
+ VA.getLocReg(),
+ ValToCopy,
+ Flag);
+ // guarantee that all emitted copies are stuck together
+ // avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+ /*if (MF.getFunction()->hasStructRetAttr()) {
+ assert(0 && "Struct returns are not yet implemented!");
+ // Both MIPS and X86 have this
+ }*/
+ RetOps[0] = Chain;
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ Flag = DAG.getNode(AMDILISD::RET_FLAG,
+ dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+ return Flag;
+}
+void
+AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
+ unsigned int opCode) const
+{
+ MachineOperand DST = MI->getOperand(0);
+ MachineOperand LHS = MI->getOperand(2);
+ MachineOperand RHS = MI->getOperand(3);
+ unsigned int opi32Code = 0, si32Code = 0;
+ unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
+ uint32_t REGS[12];
+ // All the relationals can be generated with with 6 temp registers
+ for (int x = 0; x < 12; ++x) {
+ REGS[x] = genVReg(simpleVT);
+ }
+ // Pull out the high and low components of each 64 bit register
+ generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
+ generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
+ generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
+ generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
+ // Determine the correct opcode that we should use
+ switch(opCode) {
+ default:
+ assert(!"comparison case not handled!");
+ break;
+ case AMDIL::LEQ:
+ si32Code = opi32Code = AMDIL::IEQ;
+ break;
+ case AMDIL::LNE:
+ si32Code = opi32Code = AMDIL::INE;
+ break;
+ case AMDIL::LLE:
+ case AMDIL::ULLE:
+ case AMDIL::LGE:
+ case AMDIL::ULGE:
+ if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
+ std::swap(REGS[0], REGS[2]);
+ } else {
+ std::swap(REGS[1], REGS[3]);
+ }
+ if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
+ opi32Code = AMDIL::ILT;
+ } else {
+ opi32Code = AMDIL::ULT;
+ }
+ si32Code = AMDIL::UGE;
+ break;
+ case AMDIL::LGT:
+ case AMDIL::ULGT:
+ std::swap(REGS[0], REGS[2]);
+ std::swap(REGS[1], REGS[3]);
+ case AMDIL::LLT:
+ case AMDIL::ULLT:
+ if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
+ opi32Code = AMDIL::ILT;
+ } else {
+ opi32Code = AMDIL::ULT;
+ }
+ si32Code = AMDIL::ULT;
+ break;
+ };
+ // Do the initial opcode on the high and low components.
+ // This leaves the following:
+ // REGS[4] = L_HI OP R_HI
+ // REGS[5] = L_LO OP R_LO
+ generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
+ generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
+ switch(opi32Code) {
+ case AMDIL::IEQ:
+ case AMDIL::INE:
+ {
+ // combine the results with an and or or depending on if
+ // we are eq or ne
+ uint32_t combineOp = (opi32Code == AMDIL::IEQ)
+ ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
+ generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
+ }
+ break;
+ default:
+ // this finishes codegen for the following pattern
+ // REGS[4] || (REGS[5] && (L_HI == R_HI))
+ generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
+ generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
+ REGS[9]);
+ generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
+ REGS[10]);
+ break;
+ }
+ generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
+}
+
+unsigned int
+AMDILTargetLowering::getFunctionAlignment(const Function *) const
+{
+ return 0;
+}
+
+void
+AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator &BBI,
+ DebugLoc *DL, const TargetInstrInfo *TII) const
+{
+ mBB = BB;
+ mBBI = BBI;
+ mDL = DL;
+ mTII = TII;
+}
+uint32_t
+AMDILTargetLowering::genVReg(uint32_t regType) const
+{
+ return mBB->getParent()->getRegInfo().createVirtualRegister(
+ getRegClassFromID(regType));
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
+{
+ return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1) const
+{
+ return generateMachineInst(opcode, dst).addReg(src1);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1, uint32_t src2) const
+{
+ return generateMachineInst(opcode, dst, src1).addReg(src2);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1, uint32_t src2, uint32_t src3) const
+{
+ return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
+}
+
+
+SDValue
+AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r0, DAG.getConstant(0, OVT));
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r1, DAG.getConstant(0, OVT));
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+
+ // The LowerUDIV24 function implements the following CL.
+ // int ia = (int)LHS
+ // float fa = (float)ia
+ // int ib = (int)RHS
+ // float fb = (float)ib
+ // float fq = native_divide(fa, fb)
+ // fq = trunc(fq)
+ // float t = mad(fq, fb, fb)
+ // int iq = (int)fq - (t <= fa)
+ // return (type)iq
+
+ // int ia = (int)LHS
+ SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
+
+ // float fa = (float)ia
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // int ib = (int)RHS
+ SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
+
+ // float fb = (float)ib
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb)
+ SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq)
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float t = mad(fq, fb, fb)
+ SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
+
+ // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
+ SDValue iq;
+ fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+ if (INTTY == MVT::i32) {
+ iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
+ } else {
+ iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
+ }
+ iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
+
+
+ // return (type)iq
+ iq = DAG.getZExtOrTrunc(iq, DL, OVT);
+ return iq;
+
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+SDValue
+AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r0, DAG.getConstant(0, OVT));
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r1, DAG.getConstant(0, OVT));
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM8 function generates equivalent to the following IL.
+ // mov r0, as_u32(LHS)
+ // mov r1, as_u32(RHS)
+ // and r10, r0, 0xFF
+ // and r11, r1, 0xFF
+ // cmov_logical r3, r11, r11, 0x1
+ // udiv r3, r10, r3
+ // cmov_logical r3, r11, r3, 0
+ // umul r3, r3, r11
+ // sub r3, r10, r3
+ // and as_u8(DST), r3, 0xFF
+
+ // mov r0, as_u32(LHS)
+ SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // mov r1, as_u32(RHS)
+ SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // and r10, r0, 0xFF
+ SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
+ DAG.getConstant(0xFF, INTTY));
+
+ // and r11, r1, 0xFF
+ SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
+ DAG.getConstant(0xFF, INTTY));
+
+ // cmov_logical r3, r11, r11, 0x1
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
+ DAG.getConstant(0x01, INTTY));
+
+ // udiv r3, r10, r3
+ r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
+
+ // cmov_logical r3, r11, r3, 0
+ r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
+ DAG.getConstant(0, INTTY));
+
+ // umul r3, r3, r11
+ r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
+
+ // sub r3, r10, r3
+ r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
+
+ // and as_u8(DST), r3, 0xFF
+ SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
+ DAG.getConstant(0xFF, INTTY));
+ DST = DAG.getZExtOrTrunc(DST, DL, OVT);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM16 function generatest equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // DIV = LowerUDIV16(LHS, RHS)
+ // and r10, r0, 0xFFFF
+ // and r11, r1, 0xFFFF
+ // cmov_logical r3, r11, r11, 0x1
+ // udiv as_u16(r3), as_u32(r10), as_u32(r3)
+ // and r3, r3, 0xFFFF
+ // cmov_logical r3, r11, r3, 0
+ // umul r3, r3, r11
+ // sub r3, r10, r3
+ // and DST, r3, 0xFFFF
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // and r10, r0, 0xFFFF
+ SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // and r11, r1, 0xFFFF
+ SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // cmov_logical r3, r11, r11, 0x1
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
+ DAG.getConstant(0x01, OVT));
+
+ // udiv as_u16(r3), as_u32(r10), as_u32(r3)
+ r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
+ r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
+ r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
+ r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
+ r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
+
+ // and r3, r3, 0xFFFF
+ r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // cmov_logical r3, r11, r3, 0
+ r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
+ DAG.getConstant(0, OVT));
+ // umul r3, r3, r11
+ r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
+
+ // sub r3, r10, r3
+ r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
+
+ // and DST, r3, 0xFFFF
+ SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
+ DAG.getConstant(0xFFFF, OVT));
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM32 function generates equivalent to the following IL.
+ // udiv r20, LHS, RHS
+ // umul r20, r20, RHS
+ // sub DST, LHS, r20
+
+ // udiv r20, LHS, RHS
+ SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
+
+ // umul r20, r20, RHS
+ r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
+
+ // sub DST, LHS, r20
+ SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+
+SDValue
+AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2f32) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4f32) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue DST;
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // TODO: This doesn't work for vector types yet
+ // The LowerFDIV32 function generates equivalent to the following
+ // IL:
+ // mov r20, as_int(LHS)
+ // mov r21, as_int(RHS)
+ // and r30, r20, 0x7f800000
+ // and r31, r20, 0x807FFFFF
+ // and r32, r21, 0x7f800000
+ // and r33, r21, 0x807FFFFF
+ // ieq r40, r30, 0x7F800000
+ // ieq r41, r31, 0x7F800000
+ // ieq r42, r32, 0
+ // ieq r43, r33, 0
+ // and r50, r20, 0x80000000
+ // and r51, r21, 0x80000000
+ // ior r32, r32, 0x3f800000
+ // ior r33, r33, 0x3f800000
+ // cmov_logical r32, r42, r50, r32
+ // cmov_logical r33, r43, r51, r33
+ // cmov_logical r32, r40, r20, r32
+ // cmov_logical r33, r41, r21, r33
+ // ior r50, r40, r41
+ // ior r51, r42, r43
+ // ior r50, r50, r51
+ // inegate r52, r31
+ // iadd r30, r30, r52
+ // cmov_logical r30, r50, 0, r30
+ // div_zeroop(infinity) r21, 1.0, r33
+ // mul_ieee r20, r32, r21
+ // and r22, r20, 0x7FFFFFFF
+ // and r23, r20, 0x80000000
+ // ishr r60, r22, 0x00000017
+ // ishr r61, r30, 0x00000017
+ // iadd r20, r20, r30
+ // iadd r21, r22, r30
+ // iadd r60, r60, r61
+ // ige r42, 0, R60
+ // ior r41, r23, 0x7F800000
+ // ige r40, r60, 0x000000FF
+ // cmov_logical r40, r50, 0, r40
+ // cmov_logical r20, r42, r23, r20
+ // cmov_logical DST, r40, r41, r20
+ // as_float(DST)
+
+ // mov r20, as_int(LHS)
+ SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
+
+ // mov r21, as_int(RHS)
+ SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
+
+ // and r30, r20, 0x7f800000
+ SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x7F800000, INTTY));
+
+ // and r31, r21, 0x7f800000
+ SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x7f800000, INTTY));
+
+ // and r32, r20, 0x807FFFFF
+ SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x807FFFFF, INTTY));
+
+ // and r33, r21, 0x807FFFFF
+ SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x807FFFFF, INTTY));
+
+ // ieq r40, r30, 0x7F800000
+ SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R30, DAG.getConstant(0x7F800000, INTTY));
+
+ // ieq r41, r31, 0x7F800000
+ SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R31, DAG.getConstant(0x7F800000, INTTY));
+
+ // ieq r42, r30, 0
+ SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R30, DAG.getConstant(0, INTTY));
+
+ // ieq r43, r31, 0
+ SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R31, DAG.getConstant(0, INTTY));
+
+ // and r50, r20, 0x80000000
+ SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // and r51, r21, 0x80000000
+ SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // ior r32, r32, 0x3f800000
+ R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
+ DAG.getConstant(0x3F800000, INTTY));
+
+ // ior r33, r33, 0x3f800000
+ R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
+ DAG.getConstant(0x3F800000, INTTY));
+
+ // cmov_logical r32, r42, r50, r32
+ R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
+
+ // cmov_logical r33, r43, r51, r33
+ R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
+
+ // cmov_logical r32, r40, r20, r32
+ R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
+
+ // cmov_logical r33, r41, r21, r33
+ R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
+
+ // ior r50, r40, r41
+ R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
+
+ // ior r51, r42, r43
+ R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
+
+ // ior r50, r50, r51
+ R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
+
+ // inegate r52, r31
+ SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
+
+ // iadd r30, r30, r52
+ R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
+
+ // cmov_logical r30, r50, 0, r30
+ R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
+ DAG.getConstant(0, INTTY), R30);
+
+ // div_zeroop(infinity) r21, 1.0, as_float(r33)
+ R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
+ R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+ DAG.getConstantFP(1.0f, OVT), R33);
+
+ // mul_ieee as_int(r20), as_float(r32), r21
+ R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
+ R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
+ R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
+
+ // div_zeroop(infinity) r21, 1.0, as_float(r33)
+ R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
+ R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+ DAG.getConstantFP(1.0f, OVT), R33);
+
+ // mul_ieee as_int(r20), as_float(r32), r21
+ R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
+ R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
+ R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
+
+ // and r22, r20, 0x7FFFFFFF
+ SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x7FFFFFFF, INTTY));
+
+ // and r23, r20, 0x80000000
+ SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // ishr r60, r22, 0x00000017
+ SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
+ DAG.getConstant(0x00000017, INTTY));
+
+ // ishr r61, r30, 0x00000017
+ SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
+ DAG.getConstant(0x00000017, INTTY));
+
+ // iadd r20, r20, r30
+ R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
+
+ // iadd r21, r22, r30
+ R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
+
+ // iadd r60, r60, r61
+ R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
+
+ // ige r42, 0, R60
+ R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ DAG.getConstant(0, INTTY),
+ R60);
+
+ // ior r41, r23, 0x7F800000
+ R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
+ DAG.getConstant(0x7F800000, INTTY));
+
+ // ige r40, r60, 0x000000FF
+ R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ R60,
+ DAG.getConstant(0x0000000FF, INTTY));
+
+ // cmov_logical r40, r50, 0, r40
+ R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
+ DAG.getConstant(0, INTTY),
+ R40);
+
+ // cmov_logical r20, r42, r23, r20
+ R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
+
+ // cmov_logical DST, r40, r41, r20
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
+
+ // as_float(DST)
+ DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
+ } else {
+ // The following sequence of DAG nodes produce the following IL:
+ // fabs r1, RHS
+ // lt r2, 0x1.0p+96f, r1
+ // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
+ // mul_ieee r1, RHS, r3
+ // div_zeroop(infinity) r0, LHS, r1
+ // mul_ieee DST, r0, r3
+
+ // fabs r1, RHS
+ SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
+ // lt r2, 0x1.0p+96f, r1
+ SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
+ DAG.getConstant(0x6f800000, INTTY), r1);
+ // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
+ DAG.getConstant(0x2f800000, INTTY),
+ DAG.getConstant(0x3f800000, INTTY));
+ // mul_ieee r1, RHS, r3
+ r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
+ // div_zeroop(infinity) r0, LHS, r1
+ SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
+ // mul_ieee DST, r0, r3
+ DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
diff --git a/lib/Target/AMDIL/AMDILISelLowering.h b/lib/Target/AMDIL/AMDILISelLowering.h
new file mode 100644
index 0000000..302f0cb
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILISelLowering.h
@@ -0,0 +1,527 @@
+//===-- AMDILISelLowering.h - AMDIL DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AMDIL uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_ISELLOWERING_H_
+#define AMDIL_ISELLOWERING_H_
+#include "AMDIL.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm
+{
+ namespace AMDILISD
+ {
+ enum
+ {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ INTTOANY, // Dummy instruction that takes an int and goes to
+ // any type converts the SDNode to an int
+ DP_TO_FP, // Conversion from 64bit FP to 32bit FP
+ FP_TO_DP, // Conversion from 32bit FP to 64bit FP
+ BITCONV, // instruction that converts from any type to any type
+ CMOV, // 32bit FP Conditional move instruction
+ CMOVLOG, // 32bit FP Conditional move logical instruction
+ SELECT, // 32bit FP Conditional move logical instruction
+ SETCC, // 32bit FP Conditional move logical instruction
+ ISGN, // 32bit Int Sign instruction
+ INEGATE, // 32bit Int Negation instruction
+ MAD, // 32bit Fused Multiply Add instruction
+ ADD, // 32/64 bit pseudo instruction
+ AND, // 128 bit and instruction
+ OR, // 128 bit or instruction
+ NOT, // 128 bit not instruction
+ XOR, // 128 bit xor instruction
+ MOVE, // generic mov instruction
+ PHIMOVE, // generic phi-node mov instruction
+ VBUILD, // scalar to vector mov instruction
+ VEXTRACT, // extract vector components
+ VINSERT, // insert vector components
+ VCONCAT, // concat a single vector to another vector
+ UMAD, // 32bit UInt Fused Multiply Add instruction
+ CALL, // Function call based on a single integer
+ RET, // Return from a function call
+ SELECT_CC, // Select the correct conditional instruction
+ BRCC, // Select the correct branch instruction
+ CMPCC, // Compare to GPR operands
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC
+ //flags.
+ SELECT_FCC, // Select between two values using the current FCC
+ //flags.
+ LCREATE, // Create a 64bit integer from two 32 bit integers
+ LCOMPHI, // Get the hi 32 bits from a 64 bit integer
+ LCOMPLO, // Get the lo 32 bits from a 64 bit integer
+ DCREATE, // Create a 64bit float from two 32 bit integers
+ DCOMPHI, // Get the hi 32 bits from a 64 bit float
+ DCOMPLO, // Get the lo 32 bits from a 64 bit float
+ LCREATE2, // Create a 64bit integer from two 32 bit integers
+ LCOMPHI2, // Get the hi 32 bits from a 64 bit integer
+ LCOMPLO2, // Get the lo 32 bits from a 64 bit integer
+ DCREATE2, // Create a 64bit float from two 32 bit integers
+ DCOMPHI2, // Get the hi 32 bits from a 64 bit float
+ DCOMPLO2, // Get the lo 32 bits from a 64 bit float
+ UMUL, // 32bit unsigned multiplication
+ IFFB_HI, // 32bit find first hi bit instruction
+ IFFB_LO, // 32bit find first low bit instruction
+ DIV_INF, // Divide with infinity returned on zero divisor
+ SMAX, // Signed integer max
+ CMP,
+ IL_CC_I_GT,
+ IL_CC_I_LT,
+ IL_CC_I_GE,
+ IL_CC_I_LE,
+ IL_CC_I_EQ,
+ IL_CC_I_NE,
+ RET_FLAG,
+ BRANCH_COND,
+ LOOP_NZERO,
+ LOOP_ZERO,
+ LOOP_CMP,
+ ADDADDR,
+ // ATOMIC Operations
+ // Global Memory
+ ATOM_G_ADD = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ ATOM_G_AND,
+ ATOM_G_CMPXCHG,
+ ATOM_G_DEC,
+ ATOM_G_INC,
+ ATOM_G_MAX,
+ ATOM_G_UMAX,
+ ATOM_G_MIN,
+ ATOM_G_UMIN,
+ ATOM_G_OR,
+ ATOM_G_SUB,
+ ATOM_G_RSUB,
+ ATOM_G_XCHG,
+ ATOM_G_XOR,
+ ATOM_G_ADD_NORET,
+ ATOM_G_AND_NORET,
+ ATOM_G_CMPXCHG_NORET,
+ ATOM_G_DEC_NORET,
+ ATOM_G_INC_NORET,
+ ATOM_G_MAX_NORET,
+ ATOM_G_UMAX_NORET,
+ ATOM_G_MIN_NORET,
+ ATOM_G_UMIN_NORET,
+ ATOM_G_OR_NORET,
+ ATOM_G_SUB_NORET,
+ ATOM_G_RSUB_NORET,
+ ATOM_G_XCHG_NORET,
+ ATOM_G_XOR_NORET,
+ // Local Memory
+ ATOM_L_ADD,
+ ATOM_L_AND,
+ ATOM_L_CMPXCHG,
+ ATOM_L_DEC,
+ ATOM_L_INC,
+ ATOM_L_MAX,
+ ATOM_L_UMAX,
+ ATOM_L_MIN,
+ ATOM_L_UMIN,
+ ATOM_L_OR,
+ ATOM_L_MSKOR,
+ ATOM_L_SUB,
+ ATOM_L_RSUB,
+ ATOM_L_XCHG,
+ ATOM_L_XOR,
+ ATOM_L_ADD_NORET,
+ ATOM_L_AND_NORET,
+ ATOM_L_CMPXCHG_NORET,
+ ATOM_L_DEC_NORET,
+ ATOM_L_INC_NORET,
+ ATOM_L_MAX_NORET,
+ ATOM_L_UMAX_NORET,
+ ATOM_L_MIN_NORET,
+ ATOM_L_UMIN_NORET,
+ ATOM_L_OR_NORET,
+ ATOM_L_MSKOR_NORET,
+ ATOM_L_SUB_NORET,
+ ATOM_L_RSUB_NORET,
+ ATOM_L_XCHG_NORET,
+ ATOM_L_XOR_NORET,
+ // Region Memory
+ ATOM_R_ADD,
+ ATOM_R_AND,
+ ATOM_R_CMPXCHG,
+ ATOM_R_DEC,
+ ATOM_R_INC,
+ ATOM_R_MAX,
+ ATOM_R_UMAX,
+ ATOM_R_MIN,
+ ATOM_R_UMIN,
+ ATOM_R_OR,
+ ATOM_R_MSKOR,
+ ATOM_R_SUB,
+ ATOM_R_RSUB,
+ ATOM_R_XCHG,
+ ATOM_R_XOR,
+ ATOM_R_ADD_NORET,
+ ATOM_R_AND_NORET,
+ ATOM_R_CMPXCHG_NORET,
+ ATOM_R_DEC_NORET,
+ ATOM_R_INC_NORET,
+ ATOM_R_MAX_NORET,
+ ATOM_R_UMAX_NORET,
+ ATOM_R_MIN_NORET,
+ ATOM_R_UMIN_NORET,
+ ATOM_R_OR_NORET,
+ ATOM_R_MSKOR_NORET,
+ ATOM_R_SUB_NORET,
+ ATOM_R_RSUB_NORET,
+ ATOM_R_XCHG_NORET,
+ ATOM_R_XOR_NORET,
+ // Append buffer
+ APPEND_ALLOC,
+ APPEND_ALLOC_NORET,
+ APPEND_CONSUME,
+ APPEND_CONSUME_NORET,
+ // 2D Images
+ IMAGE2D_READ,
+ IMAGE2D_WRITE,
+ IMAGE2D_INFO0,
+ IMAGE2D_INFO1,
+ // 3D Images
+ IMAGE3D_READ,
+ IMAGE3D_WRITE,
+ IMAGE3D_INFO0,
+ IMAGE3D_INFO1,
+
+ LAST_ISD_NUMBER
+ };
+ } // AMDILISD
+
+ class MachineBasicBlock;
+ class MachineInstr;
+ class DebugLoc;
+ class TargetInstrInfo;
+
+ class AMDILTargetLowering : public TargetLowering
+ {
+ private:
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+ public:
+ AMDILTargetLowering(TargetMachine &TM);
+
+ virtual SDValue
+ LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ int
+ getVarArgsFrameOffset() const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of
+ /// the bits specified
+ /// in Mask are known to be either zero or one and return them in
+ /// the
+ /// KnownZero/KnownOne bitsets.
+ virtual void
+ computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0
+ ) const;
+
+ virtual MachineBasicBlock*
+ EmitInstrWithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual bool
+ getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const;
+ virtual const char*
+ getTargetNodeName(
+ unsigned Opcode
+ ) const;
+ // We want to mark f32/f64 floating point values as
+ // legal
+ bool
+ isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ // We don't want to shrink f64/f32 constants because
+ // they both take up the same amount of space and
+ // we don't want to use a f2d instruction.
+ bool ShouldShrinkFPConstant(EVT VT) const;
+
+ /// getFunctionAlignment - Return the Log2 alignment of this
+ /// function.
+ virtual unsigned int
+ getFunctionAlignment(const Function *F) const;
+
+ private:
+ CCAssignFn*
+ CCAssignFnForNode(unsigned int CC) const;
+
+ SDValue LowerCallResult(SDValue Chain,
+ SDValue InFlag,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA, MachineFrameInfo *MFI,
+ unsigned i) const;
+
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
+ SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ //+++--- Function dealing with conversions between floating point and
+ //integer types ---+++//
+ SDValue
+ genCLZu64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ genCLZuN(SDValue Op, SelectionDAG &DAG, uint32_t bits) const;
+ SDValue
+ genCLZu32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ genf64toi32(SDValue Op, SelectionDAG &DAG,
+ bool includeSign) const;
+
+ SDValue
+ genf64toi64(SDValue Op, SelectionDAG &DAG,
+ bool includeSign) const;
+
+ SDValue
+ genu32tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const;
+
+ SDValue
+ genu64tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG& DAG) const;
+
+ SDValue
+ LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG& DAG) const;
+
+ SDValue
+ LowerINTRINSIC_VOID(SDValue Op, SelectionDAG& DAG) const;
+
+ SDValue
+ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerADD(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSUB(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerUREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerUDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerAND(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+
+ EVT
+ genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
+
+ SDValue
+ LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ void
+ generateCMPInstr(MachineInstr*, MachineBasicBlock*,
+ const TargetInstrInfo&) const;
+ MachineOperand
+ convertToReg(MachineOperand) const;
+
+ // private members used by the set of instruction generation
+ // functions, these are marked mutable as they are cached so
+ // that they don't have to constantly be looked up when using the
+ // generateMachineInst/genVReg instructions. This is to simplify
+ // the code
+ // and to make it cleaner. The object itself doesn't change as
+ // only these functions use these three data types.
+ mutable MachineBasicBlock *mBB;
+ mutable DebugLoc *mDL;
+ mutable const TargetInstrInfo *mTII;
+ mutable MachineBasicBlock::iterator mBBI;
+ void
+ setPrivateData(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator &BBI,
+ DebugLoc *DL,
+ const TargetInstrInfo *TII) const;
+ uint32_t genVReg(uint32_t regType) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst, uint32_t src1) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst, uint32_t src1, uint32_t src2) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst, uint32_t src1, uint32_t src2,
+ uint32_t src3) const;
+ uint32_t
+ addExtensionInstructions(
+ uint32_t reg, bool signedShift,
+ unsigned int simpleVT) const;
+ void
+ generateLongRelational(MachineInstr *MI,
+ unsigned int opCode) const;
+
+ }; // AMDILTargetLowering
+} // end namespace llvm
+
+#endif // AMDIL_ISELLOWERING_H_
diff --git a/lib/Target/AMDIL/AMDILInstrInfo.cpp b/lib/Target/AMDIL/AMDILInstrInfo.cpp
new file mode 100644
index 0000000..fbc3e45
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILInstrInfo.cpp
@@ -0,0 +1,709 @@
+//===- AMDILInstrInfo.cpp - AMDIL Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILInstrInfo.h"
+#include "AMDILUtilityFunctions.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AMDILGenInstrInfo.inc"
+
+#include "AMDILInstrInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Instructions.h"
+
+using namespace llvm;
+
+AMDILInstrInfo::AMDILInstrInfo(AMDILTargetMachine &tm)
+ : AMDILGenInstrInfo(AMDIL::ADJCALLSTACKDOWN, AMDIL::ADJCALLSTACKUP),
+ RI(tm, *this),
+ TM(tm) {
+}
+
+const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+/// Return true if the instruction is a register to register move and leave the
+/// source and dest operands in the passed parameters.
+bool AMDILInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
+ unsigned int &DstReg, unsigned int &SrcSubIdx,
+ unsigned int &DstSubIdx) const {
+ // FIXME: we should look for:
+ // add with 0
+ //assert(0 && "is Move Instruction has not been implemented yet!");
+ //return true;
+ if (!isMove(MI.getOpcode())) {
+ return false;
+ }
+ if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) {
+ return false;
+ }
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ DstSubIdx = 0;
+ SrcSubIdx = 0;
+ return true;
+}
+
+bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+// TODO: Implement this function
+ return false;
+}
+
+unsigned AMDILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+unsigned AMDILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+bool AMDILInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+unsigned AMDILInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+unsigned AMDILInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+#if 0
+void
+AMDILInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
+// TODO: Implement this function
+}
+
+MachineInst AMDILInstrInfo::duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const {
+// TODO: Implement this function
+ return NULL;
+}
+#endif
+MachineInstr *
+AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+// TODO: Implement this function
+ return NULL;
+}
+#if 0
+MachineInst AMDILInstrInfo::commuteInstruction(MachineInstr *MI,
+ bool NewMI = false) const {
+// TODO: Implement this function
+ return NULL;
+}
+bool
+AMDILInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const
+{
+// TODO: Implement this function
+}
+bool
+AMDILInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1) const
+{
+// TODO: Implement this function
+}
+#endif
+bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const {
+ while (iter != MBB.end()) {
+ switch (iter->getOpcode()) {
+ default:
+ break;
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ case AMDIL::BRANCH:
+ return true;
+ };
+ ++iter;
+ }
+ return false;
+}
+
+bool AMDILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ bool retVal = true;
+ return retVal;
+ MachineBasicBlock::iterator iter = MBB.begin();
+ if (!getNextBranchInstr(iter, MBB)) {
+ retVal = false;
+ } else {
+ MachineInstr *firstBranch = iter;
+ if (!getNextBranchInstr(++iter, MBB)) {
+ if (firstBranch->getOpcode() == AMDIL::BRANCH) {
+ TBB = firstBranch->getOperand(0).getMBB();
+ firstBranch->eraseFromParent();
+ retVal = false;
+ } else {
+ TBB = firstBranch->getOperand(0).getMBB();
+ FBB = *(++MBB.succ_begin());
+ if (FBB == TBB) {
+ FBB = *(MBB.succ_begin());
+ }
+ Cond.push_back(firstBranch->getOperand(1));
+ retVal = false;
+ }
+ } else {
+ MachineInstr *secondBranch = iter;
+ if (!getNextBranchInstr(++iter, MBB)) {
+ if (secondBranch->getOpcode() == AMDIL::BRANCH) {
+ TBB = firstBranch->getOperand(0).getMBB();
+ Cond.push_back(firstBranch->getOperand(1));
+ FBB = secondBranch->getOperand(0).getMBB();
+ secondBranch->eraseFromParent();
+ retVal = false;
+ } else {
+ assert(0 && "Should not have two consecutive conditional branches");
+ }
+ } else {
+ MBB.getParent()->viewCFG();
+ assert(0 && "Should not have three branch instructions in"
+ " a single basic block");
+ retVal = false;
+ }
+ }
+ }
+ return retVal;
+}
+
+unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const {
+ const MachineInstr *MI = op.getParent();
+
+ switch (MI->getDesc().OpInfo->RegClass) {
+ default: // FIXME: fallthrough??
+ case AMDIL::GPRI8RegClassID: return AMDIL::BRANCH_COND_i8;
+ case AMDIL::GPRI16RegClassID: return AMDIL::BRANCH_COND_i16;
+ case AMDIL::GPRI32RegClassID: return AMDIL::BRANCH_COND_i32;
+ case AMDIL::GPRI64RegClassID: return AMDIL::BRANCH_COND_i64;
+ case AMDIL::GPRF32RegClassID: return AMDIL::BRANCH_COND_f32;
+ case AMDIL::GPRF64RegClassID: return AMDIL::BRANCH_COND_f64;
+ };
+}
+
+unsigned int
+AMDILInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const
+{
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ for (unsigned int x = 0; x < Cond.size(); ++x) {
+ Cond[x].getParent()->dump();
+ }
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(TBB);
+ } else {
+ BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
+ .addMBB(TBB).addReg(Cond[0].getReg());
+ }
+ return 1;
+ } else {
+ BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
+ .addMBB(TBB).addReg(Cond[0].getReg());
+ BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(FBB);
+ }
+ assert(0 && "Inserting two branches not supported");
+ return 0;
+}
+
+unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) {
+ return 0;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ default:
+ return 0;
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ case AMDIL::BRANCH:
+ I->eraseFromParent();
+ break;
+ }
+ I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 1;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ // FIXME: only one case??
+ default:
+ return 1;
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ I->eraseFromParent();
+ break;
+ }
+ return 2;
+}
+
+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator tmp = MBB->end();
+ if (!MBB->size()) {
+ return MBB->end();
+ }
+ while (--tmp) {
+ if (tmp->getOpcode() == AMDIL::ENDLOOP
+ || tmp->getOpcode() == AMDIL::ENDIF
+ || tmp->getOpcode() == AMDIL::ELSE) {
+ if (tmp == MBB->begin()) {
+ return tmp;
+ } else {
+ continue;
+ }
+ } else {
+ return ++tmp;
+ }
+ }
+ return MBB->end();
+}
+
+bool
+AMDILInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
+ // If we are adding to the end of a basic block we can safely assume that the
+ // move is caused by a PHI node since all move instructions that are non-PHI
+ // have already been inserted into the basic blocks Therefor we call the skip
+ // flow control instruction to move the iterator before the flow control
+ // instructions and put the move instruction there.
+ bool phi = (DestReg < 1025) || (SrcReg < 1025);
+ int movInst = phi ? getMoveInstFromID(DestRC->getID())
+ : getPHIMoveInstFromID(DestRC->getID());
+
+ MachineBasicBlock::iterator iTemp = (I == MBB.end()) ? skipFlowControl(&MBB)
+ : I;
+ if (DestRC != SrcRC) {
+ //int convInst;
+ size_t dSize = DestRC->getSize();
+ size_t sSize = SrcRC->getSize();
+ if (dSize > sSize) {
+ // Elements are going to get duplicated.
+ BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ } else if (dSize == sSize) {
+ // Direct copy, conversions are not handled.
+ BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ } else if (dSize < sSize) {
+ // Elements are going to get dropped.
+ BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ }
+ } else {
+ BuildMI( MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ }
+ return true;
+}
+void
+AMDILInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+ BuildMI(MBB, MI, DL, get(AMDIL::MOVE_v4i32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+#if 0
+ DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+ << " to " << RI.getName(DestReg) << '\n');
+ abort();
+#endif
+}
+void
+AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ unsigned int Opc = 0;
+ // MachineInstr *curMI = MI;
+ MachineFunction &MF = *(MBB.getParent());
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ DebugLoc DL;
+ switch (RC->getID()) {
+ default:
+ Opc = AMDIL::PRIVATESTORE_v4i32;
+ break;
+ case AMDIL::GPRF32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_f32;
+ break;
+ case AMDIL::GPRF64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_f64;
+ break;
+ case AMDIL::GPRI16RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i16;
+ break;
+ case AMDIL::GPRI32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i32;
+ break;
+ case AMDIL::GPRI8RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i8;
+ break;
+ case AMDIL::GPRI64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i64;
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2f32;
+ break;
+ case AMDIL::GPRV2F64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2f64;
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i16;
+ break;
+ case AMDIL::GPRV2I32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i32;
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i8;
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i64;
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4f32;
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4i16;
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4i32;
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4i8;
+ break;
+ }
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineMemOperand *MMO =
+ new MachineMemOperand(
+ MachinePointerInfo::getFixedStack(FrameIndex),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
+ if (MI != MBB.end()) {
+ DL = MI->getDebugLoc();
+ }
+ MachineInstr *nMI = BuildMI(MBB, MI, DL, get(Opc))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIndex)
+ .addMemOperand(MMO)
+ .addImm(0);
+ AMDILAS::InstrResEnc curRes;
+ curRes.bits.ResourceID
+ = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ setAsmPrinterFlags(nMI, curRes);
+}
+
+void
+AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ unsigned int Opc = 0;
+ MachineFunction &MF = *(MBB.getParent());
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ DebugLoc DL;
+ switch (RC->getID()) {
+ default:
+ Opc = AMDIL::PRIVATELOAD_v4i32;
+ break;
+ case AMDIL::GPRF32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_f32;
+ break;
+ case AMDIL::GPRF64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_f64;
+ break;
+ case AMDIL::GPRI16RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i16;
+ break;
+ case AMDIL::GPRI32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i32;
+ break;
+ case AMDIL::GPRI8RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i8;
+ break;
+ case AMDIL::GPRI64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i64;
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2f32;
+ break;
+ case AMDIL::GPRV2F64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2f64;
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i16;
+ break;
+ case AMDIL::GPRV2I32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i32;
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i8;
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i64;
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4f32;
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4i16;
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4i32;
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4i8;
+ break;
+ }
+
+ MachineMemOperand *MMO =
+ new MachineMemOperand(
+ MachinePointerInfo::getFixedStack(FrameIndex),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
+ if (MI != MBB.end()) {
+ DL = MI->getDebugLoc();
+ }
+ MachineInstr* nMI = BuildMI(MBB, MI, DL, get(Opc))
+ .addReg(DestReg, RegState::Define)
+ .addFrameIndex(FrameIndex)
+ .addMemOperand(MMO)
+ .addImm(0);
+ AMDILAS::InstrResEnc curRes;
+ curRes.bits.ResourceID
+ = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ setAsmPrinterFlags(nMI, curRes);
+
+}
+MachineInstr *
+AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+MachineInstr*
+AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // TODO: Implement this function
+ return 0;
+}
+bool
+AMDILInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const
+{
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDILInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad,
+ bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool
+AMDILInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ // TODO: Implement this function
+ return false;
+}
+
+unsigned
+AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ // TODO: Implement this function
+ return 0;
+}
+
+bool
+AMDILInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ return false;
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) {
+ return false;
+ }
+ const MachineSDNode *mload1 = dyn_cast<MachineSDNode>(Load1);
+ const MachineSDNode *mload2 = dyn_cast<MachineSDNode>(Load2);
+ if (!mload1 || !mload2) {
+ return false;
+ }
+ if (mload1->memoperands_empty() ||
+ mload2->memoperands_empty()) {
+ return false;
+ }
+ MachineMemOperand *memOp1 = (*mload1->memoperands_begin());
+ MachineMemOperand *memOp2 = (*mload2->memoperands_begin());
+ const Value *mv1 = memOp1->getValue();
+ const Value *mv2 = memOp2->getValue();
+ if (!memOp1->isLoad() || !memOp2->isLoad()) {
+ return false;
+ }
+ if (getBasePointerValue(mv1) == getBasePointerValue(mv2)) {
+ if (isa<GetElementPtrInst>(mv1) && isa<GetElementPtrInst>(mv2)) {
+ const GetElementPtrInst *gep1 = dyn_cast<GetElementPtrInst>(mv1);
+ const GetElementPtrInst *gep2 = dyn_cast<GetElementPtrInst>(mv2);
+ if (!gep1 || !gep2) {
+ return false;
+ }
+ if (gep1->getNumOperands() != gep2->getNumOperands()) {
+ return false;
+ }
+ for (unsigned i = 0, e = gep1->getNumOperands() - 1; i < e; ++i) {
+ const Value *op1 = gep1->getOperand(i);
+ const Value *op2 = gep2->getOperand(i);
+ if (op1 != op2) {
+ // If any value except the last one is different, return false.
+ return false;
+ }
+ }
+ unsigned size = gep1->getNumOperands()-1;
+ if (!isa<ConstantInt>(gep1->getOperand(size))
+ || !isa<ConstantInt>(gep2->getOperand(size))) {
+ return false;
+ }
+ Offset1 = dyn_cast<ConstantInt>(gep1->getOperand(size))->getSExtValue();
+ Offset2 = dyn_cast<ConstantInt>(gep2->getOperand(size))->getSExtValue();
+ return true;
+ } else if (isa<Argument>(mv1) && isa<Argument>(mv2)) {
+ return false;
+ } else if (isa<GlobalValue>(mv1) && isa<GlobalValue>(mv2)) {
+ return false;
+ }
+ }
+ return false;
+}
+
+bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ assert(Offset2 > Offset1
+ && "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 16,
+ // then schedule together.
+ // TODO: Make the loads schedule near if it fits in a cacheline
+ return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+}
+
+bool
+AMDILInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // TODO: Implement this function
+ return true;
+}
+void AMDILInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ // TODO: Implement this function
+}
+
+bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const {
+ // TODO: Implement this function
+ return false;
+}
+#if 0
+bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ // TODO: Implement this function
+}
+
+bool AMDILInstrInfo::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ // TODO: Implement this function
+}
+#endif
+bool
+AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2)
+ const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDILInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const {
+ // TODO: Implement this function
+ return MI->getDesc().isPredicable();
+}
+
+bool
+AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // TODO: Implement this function
+ return true;
+}
+
+unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ // TODO: Implement this function
+ return 0;
+}
+
+#if 0
+unsigned
+AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const {
+ // TODO: Implement this function
+ return 0;
+}
+
+unsigned AMDILInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+ // TODO: Implement this function
+ return 0;
+}
+#endif
diff --git a/lib/Target/AMDIL/AMDILInstrInfo.h b/lib/Target/AMDIL/AMDILInstrInfo.h
new file mode 100644
index 0000000..88dd4e9
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILInstrInfo.h
@@ -0,0 +1,175 @@
+//===- AMDILInstrInfo.h - AMDIL Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILINSTRUCTIONINFO_H_
+#define AMDILINSTRUCTIONINFO_H_
+
+#include "AMDILRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AMDILGenInstrInfo.inc"
+
+namespace llvm {
+ // AMDIL - This namespace holds all of the target specific flags that
+ // instruction info tracks.
+ //
+ //class AMDILTargetMachine;
+class AMDILInstrInfo : public AMDILGenInstrInfo {
+private:
+ const AMDILRegisterInfo RI;
+ AMDILTargetMachine &TM;
+ bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const;
+ unsigned int getBranchInstr(const MachineOperand &op) const;
+public:
+ explicit AMDILInstrInfo(AMDILTargetMachine &tm);
+
+ // getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ // such, whenever a client has an instance of instruction info, it should
+ // always be able to get register info as well (through this method).
+ const AMDILRegisterInfo &getRegisterInfo() const;
+
+ // Return true if the instruction is a register to register move and leave the
+ // source and dest operands in the passed parameters.
+ bool isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
+ unsigned int &DstReg, unsigned int &SrcSubIdx,
+ unsigned int &DstSubIdx) const;
+
+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &DstReg, unsigned &SubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+ unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+
+
+#if 0
+ void reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const;
+ MachineInstr *duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const;
+#endif
+ MachineInstr *
+ convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+#if 0
+ MachineInstr *commuteInstruction(MachineInstr *MI,
+ bool NewMI = false) const;
+ bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const;
+ bool produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1) const;
+
+#endif
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ unsigned
+ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+protected:
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const;
+public:
+ bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const;
+ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode *> &NewNodes) const;
+ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = 0) const;
+ bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2) const;
+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool isPredicated(const MachineInstr *MI) const;
+#if 0
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+#endif
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ bool isPredicable(MachineInstr *MI) const;
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+#if 0
+ unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
+ unsigned getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const;
+#endif
+ };
+
+}
+
+#endif // AMDILINSTRINFO_H_
diff --git a/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp b/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp
new file mode 100644
index 0000000..75729ac
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp
@@ -0,0 +1,190 @@
+//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL Implementation of the IntrinsicInfo class.
+//
+//===-----------------------------------------------------------------------===//
+
+#include "AMDILIntrinsicInfo.h"
+#include "AMDIL.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+
+using namespace llvm;
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "AMDILGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+AMDILIntrinsicInfo::AMDILIntrinsicInfo(AMDILTargetMachine *tm)
+ : TargetIntrinsicInfo(), mTM(tm)
+{
+}
+
+std::string
+AMDILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
+ unsigned int numTys) const
+{
+ static const char* const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ //assert(!isOverloaded(IntrID)
+ //&& "AMDIL Intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics) {
+ return 0;
+ }
+ assert(IntrID < AMDGPUIntrinsic::num_AMDIL_intrinsics
+ && "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+ static bool
+checkTruncation(const char *Name, unsigned int& Len)
+{
+ const char *ptr = Name + (Len - 1);
+ while(ptr != Name && *ptr != '_') {
+ --ptr;
+ }
+ // We don't want to truncate on atomic instructions
+ // but we do want to enter the check Truncation
+ // section so that we can translate the atomic
+ // instructions if we need to.
+ if (!strncmp(Name, "__atom", 6)) {
+ return true;
+ }
+ if (strstr(ptr, "i32")
+ || strstr(ptr, "u32")
+ || strstr(ptr, "i64")
+ || strstr(ptr, "u64")
+ || strstr(ptr, "f32")
+ || strstr(ptr, "f64")
+ || strstr(ptr, "i16")
+ || strstr(ptr, "u16")
+ || strstr(ptr, "i8")
+ || strstr(ptr, "u8")) {
+ Len = (unsigned int)(ptr - Name);
+ return true;
+ }
+ return false;
+}
+
+// We don't want to support both the OpenCL 1.0 atomics
+// and the 1.1 atomics with different names, so we translate
+// the 1.0 atomics to the 1.1 naming here if needed.
+static char*
+atomTranslateIfNeeded(const char *Name, unsigned int Len)
+{
+ char *buffer = NULL;
+ if (strncmp(Name, "__atom_", 7)) {
+ // If we are not starting with __atom_, then
+ // go ahead and continue on with the allocation.
+ buffer = new char[Len + 1];
+ memcpy(buffer, Name, Len);
+ } else {
+ buffer = new char[Len + 3];
+ memcpy(buffer, "__atomic_", 9);
+ memcpy(buffer + 9, Name + 7, Len - 7);
+ Len += 2;
+ }
+ buffer[Len] = '\0';
+ return buffer;
+}
+
+unsigned int
+AMDILIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
+{
+#define GET_FUNCTION_RECOGNIZER
+#include "AMDILGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ AMDGPUIntrinsic::ID IntrinsicID
+ = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
+ if (checkTruncation(Name, Len)) {
+ char *buffer = atomTranslateIfNeeded(Name, Len);
+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", buffer);
+ delete [] buffer;
+ } else {
+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
+ }
+ if (!isValidIntrinsic(IntrinsicID)) {
+ return 0;
+ }
+ if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
+ return IntrinsicID;
+ }
+ return 0;
+}
+
+bool
+AMDILIntrinsicInfo::isOverloaded(unsigned id) const
+{
+ // Overload Table
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function*
+AMDILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTys) const
+{
+ assert(!isOverloaded(IntrID) && "AMDIL intrinsics are not overloaded");
+ AttrListPtr AList = getAttributes((AMDGPUIntrinsic::ID) IntrID);
+ LLVMContext& Context = M->getContext();
+ unsigned int id = IntrID;
+ Type *ResultTy = NULL;
+ std::vector<Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+ // We need to add the resource ID argument for atomics.
+ if (id >= AMDGPUIntrinsic::AMDIL_atomic_add_gi32
+ && id <= AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret) {
+ ArgTys.push_back(IntegerType::get(Context, 32));
+ }
+
+ return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+ FunctionType::get(ResultTy, ArgTys, IsVarArg),
+ AList));
+}
+
+/// Because the code generator has to support different SC versions,
+/// this function is added to check that the intrinsic being used
+/// is actually valid. In the case where it isn't valid, the
+/// function call is not translated into an intrinsic and the
+/// fall back software emulated path should pick up the result.
+bool
+AMDILIntrinsicInfo::isValidIntrinsic(unsigned int IntrID) const
+{
+ const AMDILSubtarget *stm = mTM->getSubtargetImpl();
+ switch (IntrID) {
+ default:
+ return true;
+ case AMDGPUIntrinsic::AMDIL_convert_f32_i32_rpi:
+ case AMDGPUIntrinsic::AMDIL_convert_f32_i32_flr:
+ case AMDGPUIntrinsic::AMDIL_convert_f32_f16_near:
+ case AMDGPUIntrinsic::AMDIL_convert_f32_f16_neg_inf:
+ case AMDGPUIntrinsic::AMDIL_convert_f32_f16_plus_inf:
+ return stm->calVersion() >= CAL_VERSION_SC_139;
+ };
+}
diff --git a/lib/Target/AMDIL/AMDILIntrinsicInfo.h b/lib/Target/AMDIL/AMDILIntrinsicInfo.h
new file mode 100644
index 0000000..513c6f0
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILIntrinsicInfo.h
@@ -0,0 +1,49 @@
+//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the AMDIL Implementation of the Intrinsic Info class.
+//
+//===-----------------------------------------------------------------------===//
+#ifndef _AMDIL_INTRINSICS_H_
+#define _AMDIL_INTRINSICS_H_
+
+#include "llvm/Intrinsics.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+ class AMDILTargetMachine;
+ namespace AMDGPUIntrinsic {
+ enum ID {
+ last_non_AMDIL_intrinsic = Intrinsic::num_intrinsics - 1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_AMDIL_intrinsics
+ };
+
+ }
+
+
+ class AMDILIntrinsicInfo : public TargetIntrinsicInfo {
+ AMDILTargetMachine *mTM;
+ public:
+ AMDILIntrinsicInfo(AMDILTargetMachine *tm);
+ std::string getName(unsigned int IntrId, Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ unsigned int lookupName(const char *Name, unsigned int Len) const;
+ bool isOverloaded(unsigned int IID) const;
+ Function *getDeclaration(Module *M, unsigned int ID,
+ Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ bool isValidIntrinsic(unsigned int) const;
+ }; // AMDILIntrinsicInfo
+}
+
+#endif // _AMDIL_INTRINSICS_H_
+
diff --git a/lib/Target/AMDIL/AMDILNIDevice.cpp b/lib/Target/AMDIL/AMDILNIDevice.cpp
new file mode 100644
index 0000000..8fda1c1
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILNIDevice.cpp
@@ -0,0 +1,71 @@
+//===-- AMDILNIDevice.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILNIDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILSubtarget.h"
+
+using namespace llvm;
+
+AMDILNIDevice::AMDILNIDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST)
+{
+ std::string name = ST->getDeviceName();
+ if (name == "caicos") {
+ mDeviceFlag = OCL_DEVICE_CAICOS;
+ } else if (name == "turks") {
+ mDeviceFlag = OCL_DEVICE_TURKS;
+ } else if (name == "cayman") {
+ mDeviceFlag = OCL_DEVICE_CAYMAN;
+ } else {
+ mDeviceFlag = OCL_DEVICE_BARTS;
+ }
+}
+AMDILNIDevice::~AMDILNIDevice()
+{
+}
+
+size_t
+AMDILNIDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDILNIDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD6XXX;
+}
+
+
+AMDILCaymanDevice::AMDILCaymanDevice(AMDILSubtarget *ST)
+ : AMDILNIDevice(ST)
+{
+ setCaps();
+}
+
+AMDILCaymanDevice::~AMDILCaymanDevice()
+{
+}
+
+void
+AMDILCaymanDevice::setCaps()
+{
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ mHWBits.set(AMDILDeviceInfo::FMA);
+ }
+ mHWBits.set(AMDILDeviceInfo::Signed24BitOps);
+ mSWBits.reset(AMDILDeviceInfo::Signed24BitOps);
+ mSWBits.set(AMDILDeviceInfo::ArenaSegment);
+}
+
diff --git a/lib/Target/AMDIL/AMDILNIDevice.h b/lib/Target/AMDIL/AMDILNIDevice.h
new file mode 100644
index 0000000..556670a
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILNIDevice.h
@@ -0,0 +1,59 @@
+//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===---------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef _AMDILNIDEVICE_H_
+#define _AMDILNIDEVICE_H_
+#include "AMDILEvergreenDevice.h"
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+ class AMDILSubtarget;
+//===---------------------------------------------------------------------===//
+// NI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+// The AMDILNIDevice is the base class for all Northern Island series of
+// cards. It is very similiar to the AMDILEvergreenDevice, with the major
+// exception being differences in wavefront size and hardware capabilities. The
+// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
+// integer operations
+
+ class AMDILNIDevice : public AMDILEvergreenDevice {
+ public:
+ AMDILNIDevice(AMDILSubtarget*);
+ virtual ~AMDILNIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+ protected:
+ }; // AMDILNIDevice
+
+// Just as the AMDILCypressDevice is the double capable version of the
+// AMDILEvergreenDevice, the AMDILCaymanDevice is the double capable version of
+// the AMDILNIDevice. The other major difference that is not as useful from
+// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
+// NI family is a 5 wide.
+
+ class AMDILCaymanDevice: public AMDILNIDevice {
+ public:
+ AMDILCaymanDevice(AMDILSubtarget*);
+ virtual ~AMDILCaymanDevice();
+ private:
+ virtual void setCaps();
+ }; // AMDILCaymanDevice
+
+ static const unsigned int MAX_LDS_SIZE_900 = AMDILDevice::MAX_LDS_SIZE_800;
+} // namespace llvm
+#endif // _AMDILNIDEVICE_H_
diff --git a/lib/Target/AMDIL/AMDILRegisterInfo.cpp b/lib/Target/AMDIL/AMDILRegisterInfo.cpp
new file mode 100644
index 0000000..5588233
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILRegisterInfo.cpp
@@ -0,0 +1,200 @@
+//===- AMDILRegisterInfo.cpp - AMDIL Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILRegisterInfo.h"
+#include "AMDIL.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+AMDILRegisterInfo::AMDILRegisterInfo(AMDILTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDILGenRegisterInfo(0), // RA???
+ TM(tm), TII(tii)
+{
+ baseOffset = 0;
+ nextFuncOffset = 0;
+}
+
+const uint16_t*
+AMDILRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ static const uint16_t CalleeSavedRegs[] = { 0 };
+ // TODO: Does IL need to actually have any callee saved regs?
+ // I don't think we do since we can just use sequential registers
+ // Maybe this would be easier if every function call was inlined first
+ // and then there would be no callee issues to deal with
+ //TODO(getCalleeSavedRegs);
+ return CalleeSavedRegs;
+}
+
+BitVector
+AMDILRegisterInfo::getReservedRegs(const MachineFunction &MF) const
+{
+ BitVector Reserved(getNumRegs());
+ // We reserve the first getNumRegs() registers as they are the ones passed
+ // in live-in/live-out
+ // and therefor cannot be killed by the scheduler. This works around a bug
+ // discovered
+ // that was causing the linearscan register allocator to kill registers
+ // inside of the
+ // function that were also passed as LiveIn registers.
+ for (unsigned int x = 0, y = 256; x < y; ++x) {
+ Reserved.set(x);
+ }
+ return Reserved;
+}
+
+BitVector
+AMDILRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+ const TargetRegisterClass *RC = NULL) const
+{
+ BitVector Allocatable(getNumRegs());
+ Allocatable.clear();
+ return Allocatable;
+}
+
+const TargetRegisterClass* const*
+AMDILRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
+{
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+ // TODO: Keep in sync with getCalleeSavedRegs
+ //TODO(getCalleeSavedRegClasses);
+ return CalleeSavedRegClasses;
+}
+void
+AMDILRegisterInfo::eliminateCallFramePseudoInstr(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const
+{
+ MBB.erase(I);
+}
+
+// For each frame index we find, we store the offset in the stack which is
+// being pushed back into the global buffer. The offset into the stack where
+// the value is stored is copied into a new register and the frame index is
+// then replaced with that register.
+void
+AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+ RegScavenger *RS) const
+{
+ assert(SPAdj == 0 && "Unexpected");
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned int y = MI.getNumOperands();
+ for (unsigned int x = 0; x < y; ++x) {
+ if (!MI.getOperand(x).isFI()) {
+ continue;
+ }
+ bool def = isStoreInst(TM.getInstrInfo(), &MI);
+ int FrameIndex = MI.getOperand(x).getIndex();
+ int64_t Offset = MFI->getObjectOffset(FrameIndex);
+ //int64_t Size = MF.getFrameInfo()->getObjectSize(FrameIndex);
+ // An optimization is to only use the offsets if the size
+ // is larger than 4, which means we are storing an array
+ // instead of just a pointer. If we are size 4 then we can
+ // just do register copies since we don't need to worry about
+ // indexing dynamically
+ MachineInstr *nMI = MF.CreateMachineInstr(
+ TII.get(AMDIL::LOADCONST_i32), MI.getDebugLoc());
+ nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, true));
+ nMI->addOperand(
+ MachineOperand::CreateImm(Offset));
+ MI.getParent()->insert(II, nMI);
+ nMI = MF.CreateMachineInstr(
+ TII.get(AMDIL::ADD_i32), MI.getDebugLoc());
+ nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, true));
+ nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, false));
+ nMI->addOperand(MachineOperand::CreateReg(AMDIL::FP, false));
+
+ MI.getParent()->insert(II, nMI);
+ if (MI.getOperand(x).isReg() == false) {
+ MI.getOperand(x).ChangeToRegister(
+ nMI->getOperand(0).getReg(), def);
+ } else {
+ MI.getOperand(x).setReg(
+ nMI->getOperand(0).getReg());
+ }
+ }
+}
+
+void
+AMDILRegisterInfo::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF) const
+{
+ //TODO(processFunctionBeforeFrameFinalized);
+ // Here we keep track of the amount of stack that the current function
+ // uses so
+ // that we can set the offset to the end of the stack and any other
+ // function call
+ // will not overwrite any stack variables.
+ // baseOffset = nextFuncOffset;
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ for (uint32_t x = 0, y = MFI->getNumObjects(); x < y; ++x) {
+ int64_t size = MFI->getObjectSize(x);
+ if (!(size % 4) && size > 1) {
+ nextFuncOffset += size;
+ } else {
+ nextFuncOffset += 16;
+ }
+ }
+}
+unsigned int
+AMDILRegisterInfo::getRARegister() const
+{
+ return AMDIL::RA;
+}
+
+unsigned int
+AMDILRegisterInfo::getFrameRegister(const MachineFunction &MF) const
+{
+ return AMDIL::FP;
+}
+
+unsigned int
+AMDILRegisterInfo::getEHExceptionRegister() const
+{
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned int
+AMDILRegisterInfo::getEHHandlerRegister() const
+{
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int64_t
+AMDILRegisterInfo::getStackSize() const
+{
+ return nextFuncOffset - baseOffset;
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "AMDILGenRegisterInfo.inc"
+
diff --git a/lib/Target/AMDIL/AMDILRegisterInfo.h b/lib/Target/AMDIL/AMDILRegisterInfo.h
new file mode 100644
index 0000000..5207cd8
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILRegisterInfo.h
@@ -0,0 +1,91 @@
+//===- AMDILRegisterInfo.h - AMDIL Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILREGISTERINFO_H_
+#define AMDILREGISTERINFO_H_
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "AMDILGenRegisterInfo.inc"
+// See header file for explanation
+
+namespace llvm
+{
+
+ class AMDILTargetMachine;
+ class TargetInstrInfo;
+ class Type;
+
+ /// DWARFFlavour - Flavour of dwarf regnumbers
+ ///
+ namespace DWARFFlavour {
+ enum {
+ AMDIL_Generic = 0
+ };
+ }
+
+ struct AMDILRegisterInfo : public AMDILGenRegisterInfo
+ {
+ AMDILTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ AMDILRegisterInfo(AMDILTargetMachine &tm, const TargetInstrInfo &tii);
+ /// Code Generation virtual methods...
+ const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector
+ getReservedRegs(const MachineFunction &MF) const;
+ BitVector
+ getAllocatableSet(const MachineFunction &MF,
+ const TargetRegisterClass *RC) const;
+
+ void
+ eliminateCallFramePseudoInstr(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ void
+ eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void
+ processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned int
+ getRARegister() const;
+
+ unsigned int
+ getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned int
+ getEHExceptionRegister() const;
+ unsigned int
+ getEHHandlerRegister() const;
+
+ int64_t
+ getStackSize() const;
+ private:
+ mutable int64_t baseOffset;
+ mutable int64_t nextFuncOffset;
+ };
+
+} // end namespace llvm
+
+#endif // AMDILREGISTERINFO_H_
diff --git a/lib/Target/AMDIL/AMDILSIDevice.cpp b/lib/Target/AMDIL/AMDILSIDevice.cpp
new file mode 100644
index 0000000..ce56098
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILSIDevice.cpp
@@ -0,0 +1,49 @@
+//===-- AMDILSIDevice.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILSIDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSubtarget.h"
+
+using namespace llvm;
+
+AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST)
+{
+}
+AMDILSIDevice::~AMDILSIDevice()
+{
+}
+
+size_t
+AMDILSIDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDILSIDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD7XXX;
+}
+
+std::string
+AMDILSIDevice::getDataLayout() const
+{
+ return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
diff --git a/lib/Target/AMDIL/AMDILSIDevice.h b/lib/Target/AMDIL/AMDILSIDevice.h
new file mode 100644
index 0000000..69f35a0
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILSIDevice.h
@@ -0,0 +1,45 @@
+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===---------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef _AMDILSIDEVICE_H_
+#define _AMDILSIDEVICE_H_
+#include "AMDILEvergreenDevice.h"
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+ class AMDILSubtarget;
+//===---------------------------------------------------------------------===//
+// SI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+// The AMDILSIDevice is the base class for all Northern Island series of
+// cards. It is very similiar to the AMDILEvergreenDevice, with the major
+// exception being differences in wavefront size and hardware capabilities. The
+// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
+// integer operations
+
+ class AMDILSIDevice : public AMDILEvergreenDevice {
+ public:
+ AMDILSIDevice(AMDILSubtarget*);
+ virtual ~AMDILSIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual std::string getDataLayout() const;
+ protected:
+ }; // AMDILSIDevice
+
+} // namespace llvm
+#endif // _AMDILSIDEVICE_H_
diff --git a/lib/Target/AMDIL/AMDILSubtarget.cpp b/lib/Target/AMDIL/AMDILSubtarget.cpp
new file mode 100644
index 0000000..11b6bbe
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILSubtarget.cpp
@@ -0,0 +1,177 @@
+//===- AMDILSubtarget.cpp - AMDIL Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file implements the AMD IL specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILSubtarget.h"
+#include "AMDIL.h"
+#include "AMDILDevices.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/SubtargetFeature.h"
+
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "AMDILGenSubtargetInfo.inc"
+
+AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS )
+{
+ memset(CapsOverride, 0, sizeof(*CapsOverride)
+ * AMDILDeviceInfo::MaxNumberCapabilities);
+ // Default card
+ std::string GPU = "rv770";
+ GPU = CPU;
+ mIs64bit = false;
+ mVersion = 0;
+ SmallVector<StringRef, DEFAULT_VEC_SLOTS> Features;
+ SplitString(FS, Features, ",");
+ mDefaultSize[0] = 64;
+ mDefaultSize[1] = 1;
+ mDefaultSize[2] = 1;
+ std::string newFeatures = "";
+#if defined(_DEBUG) || defined(DEBUG)
+ bool useTest = false;
+#endif
+ for (size_t x = 0; x < Features.size(); ++x) {
+ if (Features[x].startswith("+mwgs")) {
+ SmallVector<StringRef, DEFAULT_VEC_SLOTS> sizes;
+ SplitString(Features[x], sizes, "-");
+ size_t mDim = ::atoi(sizes[1].data());
+ if (mDim > 3) {
+ mDim = 3;
+ }
+ for (size_t y = 0; y < mDim; ++y) {
+ mDefaultSize[y] = ::atoi(sizes[y+2].data());
+ }
+#if defined(_DEBUG) || defined(DEBUG)
+ } else if (!Features[x].compare("test")) {
+ useTest = true;
+#endif
+ } else if (Features[x].startswith("+cal")) {
+ SmallVector<StringRef, DEFAULT_VEC_SLOTS> version;
+ SplitString(Features[x], version, "=");
+ mVersion = ::atoi(version[1].data());
+ } else {
+ GPU = CPU;
+ if (x > 0) newFeatures += ',';
+ newFeatures += Features[x];
+ }
+ }
+ // If we don't have a version then set it to
+ // -1 which enables everything. This is for
+ // offline devices.
+ if (!mVersion) {
+ mVersion = (uint32_t)-1;
+ }
+ for (int x = 0; x < 3; ++x) {
+ if (!mDefaultSize[x]) {
+ mDefaultSize[x] = 1;
+ }
+ }
+#if defined(_DEBUG) || defined(DEBUG)
+ if (useTest) {
+ GPU = "kauai";
+ }
+#endif
+ ParseSubtargetFeatures(GPU, newFeatures);
+#if defined(_DEBUG) || defined(DEBUG)
+ if (useTest) {
+ GPU = "test";
+ }
+#endif
+ mDevName = GPU;
+ mDevice = getDeviceFromName(mDevName, this, mIs64bit);
+}
+AMDILSubtarget::~AMDILSubtarget()
+{
+ delete mDevice;
+}
+bool
+AMDILSubtarget::isOverride(AMDILDeviceInfo::Caps caps) const
+{
+ assert(caps < AMDILDeviceInfo::MaxNumberCapabilities &&
+ "Caps index is out of bounds!");
+ return CapsOverride[caps];
+}
+bool
+AMDILSubtarget::is64bit() const
+{
+ return mIs64bit;
+}
+bool
+AMDILSubtarget::isTargetELF() const
+{
+ return false;
+}
+size_t
+AMDILSubtarget::getDefaultSize(uint32_t dim) const
+{
+ if (dim > 3) {
+ return 1;
+ } else {
+ return mDefaultSize[dim];
+ }
+}
+uint32_t
+AMDILSubtarget::calVersion() const
+{
+ return mVersion;
+}
+
+AMDILGlobalManager*
+AMDILSubtarget::getGlobalManager() const
+{
+ return mGM;
+}
+void
+AMDILSubtarget::setGlobalManager(AMDILGlobalManager *gm) const
+{
+ mGM = gm;
+}
+
+AMDILKernelManager*
+AMDILSubtarget::getKernelManager() const
+{
+ return mKM;
+}
+void
+AMDILSubtarget::setKernelManager(AMDILKernelManager *km) const
+{
+ mKM = km;
+}
+std::string
+AMDILSubtarget::getDataLayout() const
+{
+ if (!mDevice) {
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
+ }
+ return mDevice->getDataLayout();
+}
+
+std::string
+AMDILSubtarget::getDeviceName() const
+{
+ return mDevName;
+}
+const AMDILDevice *
+AMDILSubtarget::device() const
+{
+ return mDevice;
+}
diff --git a/lib/Target/AMDIL/AMDILSubtarget.h b/lib/Target/AMDIL/AMDILSubtarget.h
new file mode 100644
index 0000000..a4b0e34
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILSubtarget.h
@@ -0,0 +1,75 @@
+//=====-- AMDILSubtarget.h - Define Subtarget for the AMDIL ----*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file declares the AMDIL specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILSUBTARGET_H_
+#define _AMDILSUBTARGET_H_
+
+#include "AMDILDevice.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#include <cstdlib>
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDILGenSubtargetInfo.inc"
+
+#define MAX_CB_SIZE (1 << 16)
+namespace llvm {
+ class Module;
+ class AMDILKernelManager;
+ class AMDILGlobalManager;
+ class AMDILDevice;
+ class AMDILSubtarget : public AMDILGenSubtargetInfo {
+ private:
+ bool CapsOverride[AMDILDeviceInfo::MaxNumberCapabilities];
+ mutable AMDILGlobalManager *mGM;
+ mutable AMDILKernelManager *mKM;
+ const AMDILDevice *mDevice;
+ size_t mDefaultSize[3];
+ size_t mMinimumSize[3];
+ std::string mDevName;
+ uint32_t mVersion;
+ bool mIs64bit;
+ bool mIs32on64bit;
+ public:
+ AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS);
+ virtual ~AMDILSubtarget();
+ bool isOverride(AMDILDeviceInfo::Caps) const;
+ bool is64bit() const;
+
+ // Helper functions to simplify if statements
+ bool isTargetELF() const;
+ AMDILGlobalManager* getGlobalManager() const;
+ void setGlobalManager(AMDILGlobalManager *gm) const;
+ AMDILKernelManager* getKernelManager() const;
+ void setKernelManager(AMDILKernelManager *gm) const;
+ const AMDILDevice* device() const;
+ std::string getDataLayout() const;
+ std::string getDeviceName() const;
+ virtual size_t getDefaultSize(uint32_t dim) const;
+ // Return the version of CAL that the backend should target.
+ uint32_t calVersion() const;
+ // ParseSubtargetFeatures - Parses features string setting specified
+ // subtarget options. Definition of function is
+ //auto generated by tblgen.
+ void
+ ParseSubtargetFeatures(
+ llvm::StringRef CPU,
+ llvm::StringRef FS);
+
+ };
+
+} // end namespace llvm
+
+#endif // AMDILSUBTARGET_H_
diff --git a/lib/Target/AMDIL/AMDILTargetMachine.cpp b/lib/Target/AMDIL/AMDILTargetMachine.cpp
new file mode 100644
index 0000000..77fac1d
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILTargetMachine.cpp
@@ -0,0 +1,182 @@
+//===-- AMDILTargetMachine.cpp - Define TargetMachine for AMDIL -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILTargetMachine.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDILDevices.h"
+#include "AMDILFrameLowering.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeAMDILTarget() {
+ // Register the target
+ RegisterTargetMachine<AMDILTargetMachine> X(TheAMDILTarget);
+ RegisterTargetMachine<AMDGPUTargetMachine> Y(TheAMDGPUTarget);
+}
+
+/// AMDILTargetMachine ctor -
+///
+AMDILTargetMachine::AMDILTargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL
+)
+:
+ LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ DataLayout(Subtarget.getDataLayout()),
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+ Subtarget.device()->getStackAlignment(), 0),
+ InstrInfo(*this), //JITInfo(*this),
+ TLInfo(*this),
+ IntrinsicInfo(this)
+{
+ setAsmVerbosityDefault(true);
+ setMCUseLoc(false);
+}
+
+AMDILTargetLowering*
+AMDILTargetMachine::getTargetLowering() const
+{
+ return const_cast<AMDILTargetLowering*>(&TLInfo);
+}
+
+const AMDILInstrInfo*
+AMDILTargetMachine::getInstrInfo() const
+{
+ return &InstrInfo;
+}
+const AMDILFrameLowering*
+AMDILTargetMachine::getFrameLowering() const
+{
+ return &FrameLowering;
+}
+
+const AMDILSubtarget*
+AMDILTargetMachine::getSubtargetImpl() const
+{
+ return &Subtarget;
+}
+
+const AMDILRegisterInfo*
+AMDILTargetMachine::getRegisterInfo() const
+{
+ return &InstrInfo.getRegisterInfo();
+}
+
+const TargetData*
+AMDILTargetMachine::getTargetData() const
+{
+ return &DataLayout;
+}
+
+const AMDILIntrinsicInfo*
+AMDILTargetMachine::getIntrinsicInfo() const
+{
+ return &IntrinsicInfo;
+}
+
+ void
+AMDILTargetMachine::dump(llvm::raw_ostream &O)
+{
+ if (!mDebugMode) {
+ return;
+ }
+ O << ";AMDIL Target Machine State Dump: \n";
+}
+
+ void
+AMDILTargetMachine::setDebug(bool debugMode)
+{
+ mDebugMode = debugMode;
+}
+
+bool
+AMDILTargetMachine::getDebug() const
+{
+ return mDebugMode;
+}
+
+namespace {
+class AMDILPassConfig : public TargetPassConfig {
+
+public:
+ AMDILPassConfig(AMDILTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ AMDILTargetMachine &getAMDILTargetMachine() const {
+ return getTM<AMDILTargetMachine>();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreEmitPass();
+};
+} // End of anonymous namespace
+
+TargetPassConfig *AMDILTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AMDILPassConfig(this, PM);
+}
+
+bool AMDILPassConfig::addPreISel()
+{
+ return false;
+}
+
+bool AMDILPassConfig::addInstSelector()
+{
+ PM.add(createAMDILPeepholeOpt(*TM));
+ PM.add(createAMDILISelDag(getAMDILTargetMachine()));
+ return false;
+}
+
+bool AMDILPassConfig::addPreRegAlloc()
+{
+ // If debugging, reduce code motion. Use less aggressive pre-RA scheduler
+ if (TM->getOptLevel() == CodeGenOpt::None) {
+ llvm::RegisterScheduler::setDefault(&llvm::createSourceListDAGScheduler);
+ }
+
+ PM.add(createAMDILMachinePeephole(*TM));
+ return false;
+}
+
+bool AMDILPassConfig::addPostRegAlloc() {
+ return false; // -print-machineinstr should print after this.
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted. This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+bool AMDILPassConfig::addPreEmitPass()
+{
+ PM.add(createAMDILCFGPreparationPass(*TM));
+ PM.add(createAMDILCFGStructurizerPass(*TM));
+ return true;
+}
+
diff --git a/lib/Target/AMDIL/AMDILTargetMachine.h b/lib/Target/AMDIL/AMDILTargetMachine.h
new file mode 100644
index 0000000..0ff3674
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILTargetMachine.h
@@ -0,0 +1,72 @@
+//===-- AMDILTargetMachine.h - Define TargetMachine for AMDIL ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file declares the AMDIL specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILTARGETMACHINE_H_
+#define AMDILTARGETMACHINE_H_
+
+#include "AMDILFrameLowering.h"
+#include "AMDILISelLowering.h"
+#include "AMDILInstrInfo.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm
+{
+ class raw_ostream;
+
+ class AMDILTargetMachine : public LLVMTargetMachine
+ {
+ private:
+ AMDILSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ AMDILFrameLowering FrameLowering;
+ AMDILInstrInfo InstrInfo;
+ AMDILTargetLowering TLInfo;
+ AMDILIntrinsicInfo IntrinsicInfo;
+ bool mDebugMode;
+ CodeGenOpt::Level mOptLevel;
+
+ protected:
+
+ public:
+ AMDILTargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ // Get Target/Subtarget specific information
+ virtual AMDILTargetLowering* getTargetLowering() const;
+ virtual const AMDILInstrInfo* getInstrInfo() const;
+ virtual const AMDILFrameLowering* getFrameLowering() const;
+ virtual const AMDILSubtarget* getSubtargetImpl() const;
+ virtual const AMDILRegisterInfo* getRegisterInfo() const;
+ virtual const TargetData* getTargetData() const;
+ virtual const AMDILIntrinsicInfo *getIntrinsicInfo() const;
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ void dump(llvm::raw_ostream &O);
+ void setDebug(bool debugMode);
+ bool getDebug() const;
+ CodeGenOpt::Level getOptLevel() const { return mOptLevel; }
+
+
+ }; // AMDILTargetMachine
+
+} // end namespace llvm
+
+#endif // AMDILTARGETMACHINE_H_
diff --git a/lib/Target/AMDIL/AMDILUtilityFunctions.cpp b/lib/Target/AMDIL/AMDILUtilityFunctions.cpp
new file mode 100644
index 0000000..f2ef4eb
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILUtilityFunctions.cpp
@@ -0,0 +1,683 @@
+//===-- AMDILUtilityFunctions.cpp - AMDIL Utility Functions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file provides the implementations of functions that are declared in the
+// AMDILUtilityFUnctions.h file.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILUtilityFunctions.h"
+#include "AMDILISelLowering.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Type.h"
+
+#include <cstdio>
+#include <list>
+#include <queue>
+
+#define GET_OPCODE_NAME(TII, MI) \
+ TII->getName(MI->getOpcode())
+
+
+using namespace llvm;
+int64_t GET_SCALAR_SIZE(llvm::Type *A) {
+ return A->getScalarSizeInBits();
+}
+
+const TargetRegisterClass * getRegClassFromID(unsigned int ID) {
+ switch (ID) {
+ default:
+ assert(0 && "Passed in ID does not match any register classes.");
+ return NULL;
+ case AMDIL::GPRI8RegClassID:
+ return &AMDIL::GPRI8RegClass;
+ case AMDIL::GPRI16RegClassID:
+ return &AMDIL::GPRI16RegClass;
+ case AMDIL::GPRI32RegClassID:
+ return &AMDIL::GPRI32RegClass;
+ case AMDIL::GPRF32RegClassID:
+ return &AMDIL::GPRF32RegClass;
+ case AMDIL::GPRI64RegClassID:
+ return &AMDIL::GPRI64RegClass;
+ case AMDIL::GPRF64RegClassID:
+ return &AMDIL::GPRF64RegClass;
+ case AMDIL::GPRV4F32RegClassID:
+ return &AMDIL::GPRV4F32RegClass;
+ case AMDIL::GPRV4I8RegClassID:
+ return &AMDIL::GPRV4I8RegClass;
+ case AMDIL::GPRV4I16RegClassID:
+ return &AMDIL::GPRV4I16RegClass;
+ case AMDIL::GPRV4I32RegClassID:
+ return &AMDIL::GPRV4I32RegClass;
+ case AMDIL::GPRV2F32RegClassID:
+ return &AMDIL::GPRV2F32RegClass;
+ case AMDIL::GPRV2I8RegClassID:
+ return &AMDIL::GPRV2I8RegClass;
+ case AMDIL::GPRV2I16RegClassID:
+ return &AMDIL::GPRV2I16RegClass;
+ case AMDIL::GPRV2I32RegClassID:
+ return &AMDIL::GPRV2I32RegClass;
+ case AMDIL::GPRV2F64RegClassID:
+ return &AMDIL::GPRV2F64RegClass;
+ case AMDIL::GPRV2I64RegClassID:
+ return &AMDIL::GPRV2I64RegClass;
+ };
+}
+
+unsigned int getMoveInstFromID(unsigned int ID) {
+ switch (ID) {
+ default:
+ assert(0 && "Passed in ID does not match any move instructions.");
+ case AMDIL::GPRI8RegClassID:
+ return AMDIL::MOVE_i8;
+ case AMDIL::GPRI16RegClassID:
+ return AMDIL::MOVE_i16;
+ case AMDIL::GPRI32RegClassID:
+ return AMDIL::MOVE_i32;
+ case AMDIL::GPRF32RegClassID:
+ return AMDIL::MOVE_f32;
+ case AMDIL::GPRI64RegClassID:
+ return AMDIL::MOVE_i64;
+ case AMDIL::GPRF64RegClassID:
+ return AMDIL::MOVE_f64;
+ case AMDIL::GPRV4F32RegClassID:
+ return AMDIL::MOVE_v4f32;
+ case AMDIL::GPRV4I8RegClassID:
+ return AMDIL::MOVE_v4i8;
+ case AMDIL::GPRV4I16RegClassID:
+ return AMDIL::MOVE_v4i16;
+ case AMDIL::GPRV4I32RegClassID:
+ return AMDIL::MOVE_v4i32;
+ case AMDIL::GPRV2F32RegClassID:
+ return AMDIL::MOVE_v2f32;
+ case AMDIL::GPRV2I8RegClassID:
+ return AMDIL::MOVE_v2i8;
+ case AMDIL::GPRV2I16RegClassID:
+ return AMDIL::MOVE_v2i16;
+ case AMDIL::GPRV2I32RegClassID:
+ return AMDIL::MOVE_v2i32;
+ case AMDIL::GPRV2F64RegClassID:
+ return AMDIL::MOVE_v2f64;
+ case AMDIL::GPRV2I64RegClassID:
+ return AMDIL::MOVE_v2i64;
+ };
+ return -1;
+}
+
+unsigned int getPHIMoveInstFromID(unsigned int ID) {
+ switch (ID) {
+ default:
+ assert(0 && "Passed in ID does not match any move instructions.");
+ case AMDIL::GPRI8RegClassID:
+ return AMDIL::PHIMOVE_i8;
+ case AMDIL::GPRI16RegClassID:
+ return AMDIL::PHIMOVE_i16;
+ case AMDIL::GPRI32RegClassID:
+ return AMDIL::PHIMOVE_i32;
+ case AMDIL::GPRF32RegClassID:
+ return AMDIL::PHIMOVE_f32;
+ case AMDIL::GPRI64RegClassID:
+ return AMDIL::PHIMOVE_i64;
+ case AMDIL::GPRF64RegClassID:
+ return AMDIL::PHIMOVE_f64;
+ case AMDIL::GPRV4F32RegClassID:
+ return AMDIL::PHIMOVE_v4f32;
+ case AMDIL::GPRV4I8RegClassID:
+ return AMDIL::PHIMOVE_v4i8;
+ case AMDIL::GPRV4I16RegClassID:
+ return AMDIL::PHIMOVE_v4i16;
+ case AMDIL::GPRV4I32RegClassID:
+ return AMDIL::PHIMOVE_v4i32;
+ case AMDIL::GPRV2F32RegClassID:
+ return AMDIL::PHIMOVE_v2f32;
+ case AMDIL::GPRV2I8RegClassID:
+ return AMDIL::PHIMOVE_v2i8;
+ case AMDIL::GPRV2I16RegClassID:
+ return AMDIL::PHIMOVE_v2i16;
+ case AMDIL::GPRV2I32RegClassID:
+ return AMDIL::PHIMOVE_v2i32;
+ case AMDIL::GPRV2F64RegClassID:
+ return AMDIL::PHIMOVE_v2f64;
+ case AMDIL::GPRV2I64RegClassID:
+ return AMDIL::PHIMOVE_v2i64;
+ };
+ return -1;
+}
+
+const TargetRegisterClass* getRegClassFromType(unsigned int type) {
+ switch (type) {
+ default:
+ assert(0 && "Passed in type does not match any register classes.");
+ case MVT::i8:
+ return &AMDIL::GPRI8RegClass;
+ case MVT::i16:
+ return &AMDIL::GPRI16RegClass;
+ case MVT::i32:
+ return &AMDIL::GPRI32RegClass;
+ case MVT::f32:
+ return &AMDIL::GPRF32RegClass;
+ case MVT::i64:
+ return &AMDIL::GPRI64RegClass;
+ case MVT::f64:
+ return &AMDIL::GPRF64RegClass;
+ case MVT::v4f32:
+ return &AMDIL::GPRV4F32RegClass;
+ case MVT::v4i8:
+ return &AMDIL::GPRV4I8RegClass;
+ case MVT::v4i16:
+ return &AMDIL::GPRV4I16RegClass;
+ case MVT::v4i32:
+ return &AMDIL::GPRV4I32RegClass;
+ case MVT::v2f32:
+ return &AMDIL::GPRV2F32RegClass;
+ case MVT::v2i8:
+ return &AMDIL::GPRV2I8RegClass;
+ case MVT::v2i16:
+ return &AMDIL::GPRV2I16RegClass;
+ case MVT::v2i32:
+ return &AMDIL::GPRV2I32RegClass;
+ case MVT::v2f64:
+ return &AMDIL::GPRV2F64RegClass;
+ case MVT::v2i64:
+ return &AMDIL::GPRV2I64RegClass;
+ }
+}
+
+void printSDNode(const SDNode *N) {
+ printf("Opcode: %d isTargetOpcode: %d isMachineOpcode: %d\n",
+ N->getOpcode(), N->isTargetOpcode(), N->isMachineOpcode());
+ printf("Empty: %d OneUse: %d Size: %d NodeID: %d\n",
+ N->use_empty(), N->hasOneUse(), (int)N->use_size(), N->getNodeId());
+ for (unsigned int i = 0; i < N->getNumOperands(); ++i) {
+ printf("OperandNum: %d ValueCount: %d ValueType: %d\n",
+ i, N->getNumValues(), N->getValueType(0) .getSimpleVT().SimpleTy);
+ printSDValue(N->getOperand(i), 0);
+ }
+}
+
+void printSDValue(const SDValue &Op, int level) {
+ printf("\nOp: %p OpCode: %d NumOperands: %d ", (void*)&Op, Op.getOpcode(),
+ Op.getNumOperands());
+ printf("IsTarget: %d IsMachine: %d ", Op.isTargetOpcode(),
+ Op.isMachineOpcode());
+ if (Op.isMachineOpcode()) {
+ printf("MachineOpcode: %d\n", Op.getMachineOpcode());
+ } else {
+ printf("\n");
+ }
+ EVT vt = Op.getValueType();
+ printf("ValueType: %d \n", vt.getSimpleVT().SimpleTy);
+ printf("UseEmpty: %d OneUse: %d\n", Op.use_empty(), Op.hasOneUse());
+ if (level) {
+ printf("Children for %d:\n", level);
+ for (unsigned int i = 0; i < Op.getNumOperands(); ++i) {
+ printf("Child %d->%d:", level, i);
+ printSDValue(Op.getOperand(i), level - 1);
+ }
+ }
+}
+
+bool isPHIMove(unsigned int opcode) {
+ switch (opcode) {
+ default:
+ return false;
+ ExpandCaseToAllTypes(AMDIL::PHIMOVE);
+ return true;
+ }
+ return false;
+}
+
+bool isMove(unsigned int opcode) {
+ switch (opcode) {
+ default:
+ return false;
+ ExpandCaseToAllTypes(AMDIL::MOVE);
+ return true;
+ }
+ return false;
+}
+
+bool isMoveOrEquivalent(unsigned int opcode) {
+ switch (opcode) {
+ default:
+ return isMove(opcode) || isPHIMove(opcode);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASCHAR);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASSHORT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASINT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASLONG);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASDOUBLE);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASFLOAT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2CHAR);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2SHORT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2INT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2FLOAT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2LONG);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2DOUBLE);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4CHAR);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4SHORT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4INT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4FLOAT);
+ case AMDIL::INTTOANY_i8:
+ case AMDIL::INTTOANY_i16:
+ case AMDIL::INTTOANY_i32:
+ case AMDIL::INTTOANY_f32:
+ case AMDIL::DLO:
+ case AMDIL::LLO:
+ case AMDIL::LLO_v2i64:
+ return true;
+ };
+ return false;
+}
+
+bool check_type(const Value *ptr, unsigned int addrspace) {
+ if (!ptr) {
+ return false;
+ }
+ Type *ptrType = ptr->getType();
+ return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+}
+
+size_t getTypeSize(Type * const T, bool dereferencePtr) {
+ size_t size = 0;
+ if (!T) {
+ return size;
+ }
+ switch (T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ size = T->getPrimitiveSizeInBits() >> 3;
+ break;
+ case Type::PointerTyID:
+ size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
+ break;
+ case Type::IntegerTyID:
+ size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
+ break;
+ case Type::StructTyID:
+ size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
+ break;
+ case Type::ArrayTyID:
+ size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
+ break;
+ case Type::FunctionTyID:
+ size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
+ break;
+ case Type::VectorTyID:
+ size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
+ break;
+ };
+ return size;
+}
+
+size_t getTypeSize(StructType * const ST, bool dereferencePtr) {
+ size_t size = 0;
+ if (!ST) {
+ return size;
+ }
+ Type *curType;
+ StructType::element_iterator eib;
+ StructType::element_iterator eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ curType = *eib;
+ size += getTypeSize(curType, dereferencePtr);
+ }
+ return size;
+}
+
+size_t getTypeSize(IntegerType * const IT, bool dereferencePtr) {
+ return IT ? (IT->getBitWidth() >> 3) : 0;
+}
+
+size_t getTypeSize(FunctionType * const FT, bool dereferencePtr) {
+ assert(0 && "Should not be able to calculate the size of an function type");
+ return 0;
+}
+
+size_t getTypeSize(ArrayType * const AT, bool dereferencePtr) {
+ return (size_t)(AT ? (getTypeSize(AT->getElementType(),
+ dereferencePtr) * AT->getNumElements())
+ : 0);
+}
+
+size_t getTypeSize(VectorType * const VT, bool dereferencePtr) {
+ return VT ? (VT->getBitWidth() >> 3) : 0;
+}
+
+size_t getTypeSize(PointerType * const PT, bool dereferencePtr) {
+ if (!PT) {
+ return 0;
+ }
+ Type *CT = PT->getElementType();
+ if (CT->getTypeID() == Type::StructTyID &&
+ PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ return getTypeSize(dyn_cast<StructType>(CT));
+ } else if (dereferencePtr) {
+ size_t size = 0;
+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+ size += getTypeSize(PT->getContainedType(x), dereferencePtr);
+ }
+ return size;
+ } else {
+ return 4;
+ }
+}
+
+size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr) {
+ //assert(0 && "Should not be able to calculate the size of an opaque type");
+ return 4;
+}
+
+size_t getNumElements(Type * const T) {
+ size_t size = 0;
+ if (!T) {
+ return size;
+ }
+ switch (T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ size = 1;
+ break;
+ case Type::PointerTyID:
+ size = getNumElements(dyn_cast<PointerType>(T));
+ break;
+ case Type::IntegerTyID:
+ size = getNumElements(dyn_cast<IntegerType>(T));
+ break;
+ case Type::StructTyID:
+ size = getNumElements(dyn_cast<StructType>(T));
+ break;
+ case Type::ArrayTyID:
+ size = getNumElements(dyn_cast<ArrayType>(T));
+ break;
+ case Type::FunctionTyID:
+ size = getNumElements(dyn_cast<FunctionType>(T));
+ break;
+ case Type::VectorTyID:
+ size = getNumElements(dyn_cast<VectorType>(T));
+ break;
+ };
+ return size;
+}
+
+size_t getNumElements(StructType * const ST) {
+ size_t size = 0;
+ if (!ST) {
+ return size;
+ }
+ Type *curType;
+ StructType::element_iterator eib;
+ StructType::element_iterator eie;
+ for (eib = ST->element_begin(), eie = ST->element_end();
+ eib != eie; ++eib) {
+ curType = *eib;
+ size += getNumElements(curType);
+ }
+ return size;
+}
+
+size_t getNumElements(IntegerType * const IT) {
+ return (!IT) ? 0 : 1;
+}
+
+size_t getNumElements(FunctionType * const FT) {
+ assert(0 && "Should not be able to calculate the number of "
+ "elements of a function type");
+ return 0;
+}
+
+size_t getNumElements(ArrayType * const AT) {
+ return (!AT) ? 0
+ : (size_t)(getNumElements(AT->getElementType()) *
+ AT->getNumElements());
+}
+
+size_t getNumElements(VectorType * const VT) {
+ return (!VT) ? 0
+ : VT->getNumElements() * getNumElements(VT->getElementType());
+}
+
+size_t getNumElements(PointerType * const PT) {
+ size_t size = 0;
+ if (!PT) {
+ return size;
+ }
+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+ size += getNumElements(PT->getContainedType(x));
+ }
+ return size;
+}
+
+const llvm::Value *getBasePointerValue(const llvm::Value *V)
+{
+ if (!V) {
+ return NULL;
+ }
+ const Value *ret = NULL;
+ ValueMap<const Value *, bool> ValueBitMap;
+ std::queue<const Value *, std::list<const Value *> > ValueQueue;
+ ValueQueue.push(V);
+ while (!ValueQueue.empty()) {
+ V = ValueQueue.front();
+ if (ValueBitMap.find(V) == ValueBitMap.end()) {
+ ValueBitMap[V] = true;
+ if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
+ ret = V;
+ break;
+ } else if (dyn_cast<GlobalVariable>(V)) {
+ ret = V;
+ break;
+ } else if (dyn_cast<Constant>(V)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
+ if (CE) {
+ ValueQueue.push(CE->getOperand(0));
+ }
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ ret = AI;
+ break;
+ } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ uint32_t numOps = I->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ ValueQueue.push(I->getOperand(x));
+ }
+ } else {
+ // assert(0 && "Found a Value that we didn't know how to handle!");
+ }
+ }
+ ValueQueue.pop();
+ }
+ return ret;
+}
+
+const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI) {
+ const Value *moVal = NULL;
+ if (!MI->memoperands_empty()) {
+ const MachineMemOperand *memOp = (*MI->memoperands_begin());
+ moVal = memOp ? memOp->getValue() : NULL;
+ moVal = getBasePointerValue(moVal);
+ }
+ return moVal;
+}
+
+bool commaPrint(int i, llvm::raw_ostream &O) {
+ O << ":" << i;
+ return false;
+}
+
+bool isLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
+ if (strstr(GET_OPCODE_NAME(TII, MI), "LOADCONST")) {
+ return false;
+ }
+ return strstr(GET_OPCODE_NAME(TII, MI), "LOAD");
+}
+
+bool isSWSExtLoadInst(MachineInstr *MI)
+{
+switch (MI->getOpcode()) {
+ default:
+ break;
+ ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD);
+ ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD);
+ return true;
+ };
+ return false;
+}
+
+bool isExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
+ return strstr(GET_OPCODE_NAME(TII, MI), "EXTLOAD");
+}
+
+bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
+ return strstr(GET_OPCODE_NAME(TII, MI), "SEXTLOAD");
+}
+
+bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
+ return strstr(GET_OPCODE_NAME(TII, MI), "AEXTLOAD");
+}
+
+bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
+ return strstr(GET_OPCODE_NAME(TII, MI), "ZEXTLOAD");
+}
+
+bool isStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
+ return strstr(GET_OPCODE_NAME(TII, MI), "STORE");
+}
+
+bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
+ return strstr(GET_OPCODE_NAME(TII, MI), "TRUNCSTORE");
+}
+
+bool isAtomicInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
+ return strstr(GET_OPCODE_NAME(TII, MI), "ATOM");
+}
+
+bool isVolatileInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
+ if (!MI->memoperands_empty()) {
+ for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(),
+ moe = MI->memoperands_end(); mob != moe; ++mob) {
+ // If there is a volatile mem operand, this is a volatile instruction.
+ if ((*mob)->isVolatile()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "GLOBAL");
+}
+bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "PRIVATE");
+}
+bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "CONSTANT")
+ || strstr(GET_OPCODE_NAME(TII, MI), "CPOOL");
+}
+bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "REGION");
+}
+bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "LOCAL");
+}
+bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "IMAGE");
+}
+bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "APPEND");
+}
+bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_R");
+}
+bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_L");
+}
+bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_G")
+ || isArenaAtomic(TII, MI);
+}
+bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
+{
+ return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_A");
+}
+
+const char* getSrcSwizzle(unsigned idx) {
+ const char *srcSwizzles[] = {
+ "", ".x000", ".0x00", ".00x0", ".000x", ".y000", ".0y00", ".00y0", ".000y",
+ ".z000", ".0z00", ".00z0", ".000z", ".w000", ".0w00", ".00w0", ".000w",
+ ".xy00", ".00xy", ".zw00", ".00zw", ".xyz0", ".0xyz", ".xyzw", ".0000",
+ ".xxxx", ".yyyy", ".zzzz", ".wwww", ".xyxy", ".zwzw", ".xzxz", ".ywyw",
+ ".x0y0", ".0x0y", ".xy_neg(y)", "_neg(yw)", "_neg(x)", ".xy_neg(xy)",
+ "_neg(xyzw)", ".0yzw", ".x0zw", ".xy0w", ".x", ".y", ".z", ".w", ".xy",
+ ".zw"
+ };
+ assert(idx < sizeof(srcSwizzles)/sizeof(srcSwizzles[0])
+ && "Idx passed in is invalid!");
+ return srcSwizzles[idx];
+}
+const char* getDstSwizzle(unsigned idx) {
+ const char *dstSwizzles[] = {
+ "", ".x___", ".xy__", ".xyz_", ".xyzw", "._y__", "._yz_", "._yzw", ".__z_",
+ ".__zw", ".___w", ".x_zw", ".xy_w", ".x_z_", ".x__w", "._y_w",
+ };
+ assert(idx < sizeof(dstSwizzles)/sizeof(dstSwizzles[0])
+ && "Idx passed in is invalid!");
+ return dstSwizzles[idx];
+}
+/// Helper function to get the currently set flags
+void getAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes)
+{
+ // We need 16 bits of information, but LLVMr127097 cut the field in half.
+ // So we have to use two different fields to store all of our information.
+ uint16_t upper = MI->getFlags() << 8;
+ uint16_t lower = MI->getAsmPrinterFlags();
+ curRes.u16all = upper | lower;
+}
+/// Helper function to clear the currently set flags and add the new flags.
+void setAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes)
+{
+ // We need 16 bits of information, but LLVMr127097 cut the field in half.
+ // So we have to use two different fields to store all of our information.
+ MI->clearAsmPrinterFlags();
+ MI->setFlags(0);
+ uint8_t lower = curRes.u16all & 0xFF;
+ uint8_t upper = (curRes.u16all >> 8) & 0xFF;
+ MI->setFlags(upper);
+ MI->setAsmPrinterFlag((llvm::MachineInstr::CommentFlag)lower);
+}
diff --git a/lib/Target/AMDIL/AMDILUtilityFunctions.h b/lib/Target/AMDIL/AMDILUtilityFunctions.h
new file mode 100644
index 0000000..637c868
--- /dev/null
+++ b/lib/Target/AMDIL/AMDILUtilityFunctions.h
@@ -0,0 +1,362 @@
+//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file provides declarations for functions that are used across different
+// classes and provide various conversions or utility to shorten the code
+//
+//===----------------------------------------------------------------------===//
+#ifndef AMDILUTILITYFUNCTIONS_H_
+#define AMDILUTILITYFUNCTIONS_H_
+
+#include "AMDIL.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
+
+// Utility functions from ID
+//
+namespace llvm {
+class TargetRegisterClass;
+class SDValue;
+class SDNode;
+class Value;
+class Type;
+class StructType;
+class IntegerType;
+class FunctionType;
+class VectorType;
+class ArrayType;
+class PointerType;
+class OpaqueType;
+class MachineInstr;
+
+}
+enum SrcSwizzles {
+ AMDIL_SRC_SWIZZLE_DEFAULT = 0,
+ AMDIL_SRC_SWIZZLE_X000,
+ AMDIL_SRC_SWIZZLE_0X00,
+ AMDIL_SRC_SWIZZLE_00X0,
+ AMDIL_SRC_SWIZZLE_000X,
+ AMDIL_SRC_SWIZZLE_Y000,
+ AMDIL_SRC_SWIZZLE_0Y00,
+ AMDIL_SRC_SWIZZLE_00Y0,
+ AMDIL_SRC_SWIZZLE_000Y,
+ AMDIL_SRC_SWIZZLE_Z000,
+ AMDIL_SRC_SWIZZLE_0Z00,
+ AMDIL_SRC_SWIZZLE_00Z0,
+ AMDIL_SRC_SWIZZLE_000Z,
+ AMDIL_SRC_SWIZZLE_W000,
+ AMDIL_SRC_SWIZZLE_0W00,
+ AMDIL_SRC_SWIZZLE_00W0,
+ AMDIL_SRC_SWIZZLE_000W,
+ AMDIL_SRC_SWIZZLE_XY00,
+ AMDIL_SRC_SWIZZLE_00XY,
+ AMDIL_SRC_SWIZZLE_ZW00,
+ AMDIL_SRC_SWIZZLE_00ZW,
+ AMDIL_SRC_SWIZZLE_XYZ0,
+ AMDIL_SRC_SWIZZLE_0XYZ,
+ AMDIL_SRC_SWIZZLE_XYZW,
+ AMDIL_SRC_SWIZZLE_0000,
+ AMDIL_SRC_SWIZZLE_XXXX,
+ AMDIL_SRC_SWIZZLE_YYYY,
+ AMDIL_SRC_SWIZZLE_ZZZZ,
+ AMDIL_SRC_SWIZZLE_WWWW,
+ AMDIL_SRC_SWIZZLE_XYXY,
+ AMDIL_SRC_SWIZZLE_ZWZW,
+ AMDIL_SRC_SWIZZLE_XZXZ,
+ AMDIL_SRC_SWIZZLE_YWYW,
+ AMDIL_SRC_SWIZZLE_X0Y0,
+ AMDIL_SRC_SWIZZLE_0X0Y,
+ AMDIL_SRC_SWIZZLE_XY_NEGY,
+ AMDIL_SRC_SWIZZLE_NEGYW,
+ AMDIL_SRC_SWIZZLE_NEGX,
+ AMDIL_SRC_SWIZZLE_XY_NEGXY,
+ AMDIL_SRC_SWIZZLE_NEG_XYZW,
+ AMDIL_SRC_SWIZZLE_0YZW,
+ AMDIL_SRC_SWIZZLE_X0ZW,
+ AMDIL_SRC_SWIZZLE_XY0W,
+ AMDIL_SRC_SWIZZLE_X,
+ AMDIL_SRC_SWIZZLE_Y,
+ AMDIL_SRC_SWIZZLE_Z,
+ AMDIL_SRC_SWIZZLE_W,
+ AMDIL_SRC_SWIZZLE_XY,
+ AMDIL_SRC_SWIZZLE_ZW,
+ AMDIL_SRC_SWIZZLE_LAST
+};
+enum DstSwizzles {
+ AMDIL_DST_SWIZZLE_DEFAULT = 0,
+ AMDIL_DST_SWIZZLE_X___,
+ AMDIL_DST_SWIZZLE_XY__,
+ AMDIL_DST_SWIZZLE_XYZ_,
+ AMDIL_DST_SWIZZLE_XYZW,
+ AMDIL_DST_SWIZZLE__Y__,
+ AMDIL_DST_SWIZZLE__YZ_,
+ AMDIL_DST_SWIZZLE__YZW,
+ AMDIL_DST_SWIZZLE___Z_,
+ AMDIL_DST_SWIZZLE___ZW,
+ AMDIL_DST_SWIZZLE____W,
+ AMDIL_DST_SWIZZLE_X_ZW,
+ AMDIL_DST_SWIZZLE_XY_W,
+ AMDIL_DST_SWIZZLE_X_Z_,
+ AMDIL_DST_SWIZZLE_X__W,
+ AMDIL_DST_SWIZZLE__Y_W,
+ AMDIL_DST_SWIZZLE_LAST
+};
+// Function to get the correct src swizzle string from ID
+const char *getSrcSwizzle(unsigned);
+
+// Function to get the correct dst swizzle string from ID
+const char *getDstSwizzle(unsigned);
+
+const llvm::TargetRegisterClass *getRegClassFromID(unsigned int ID);
+
+unsigned int getMoveInstFromID(unsigned int ID);
+unsigned int getPHIMoveInstFromID(unsigned int ID);
+
+// Utility functions from Type.
+const llvm::TargetRegisterClass *getRegClassFromType(unsigned int type);
+unsigned int getTargetIndependentMoveFromType(unsigned int type);
+
+// Debug functions for SDNode and SDValue.
+void printSDValue(const llvm::SDValue &Op, int level);
+void printSDNode(const llvm::SDNode *N);
+
+// Functions to check if an opcode is a specific type.
+bool isMove(unsigned int opcode);
+bool isPHIMove(unsigned int opcode);
+bool isMoveOrEquivalent(unsigned int opcode);
+
+// Function to check address space
+bool check_type(const llvm::Value *ptr, unsigned int addrspace);
+
+// Group of functions that recursively calculate the size of a structure based
+// on it's sub-types.
+size_t getTypeSize(llvm::Type * const T, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::StructType * const ST, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::IntegerType * const IT, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::FunctionType * const FT, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::ArrayType * const AT, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::VectorType * const VT, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::PointerType * const PT, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::OpaqueType * const OT, bool dereferencePtr = false);
+
+// Group of functions that recursively calculate the number of elements of a
+// structure based on it's sub-types.
+size_t getNumElements(llvm::Type * const T);
+size_t getNumElements(llvm::StructType * const ST);
+size_t getNumElements(llvm::IntegerType * const IT);
+size_t getNumElements(llvm::FunctionType * const FT);
+size_t getNumElements(llvm::ArrayType * const AT);
+size_t getNumElements(llvm::VectorType * const VT);
+size_t getNumElements(llvm::PointerType * const PT);
+size_t getNumElements(llvm::OpaqueType * const OT);
+const llvm::Value *getBasePointerValue(const llvm::Value *V);
+const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI);
+
+
+int64_t GET_SCALAR_SIZE(llvm::Type* A);
+
+// Helper functions that check the opcode for status information
+bool isLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isSWSExtLoadInst(llvm::MachineInstr *MI);
+bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isAtomicInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isVolatileInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
+
+
+// Macros that are used to help with switch statements for various data types
+// However, these macro's do not return anything unlike the second set below.
+#define ExpandCaseTo32bitIntTypes(Instr) \
+case Instr##_i8: \
+case Instr##_i16: \
+case Instr##_i32:
+
+#define ExpandCaseTo32bitIntTruncTypes(Instr) \
+case Instr##_i16i8: \
+case Instr##_i32i8: \
+case Instr##_i32i16:
+
+#define ExpandCaseToIntTypes(Instr) \
+ ExpandCaseTo32bitIntTypes(Instr) \
+case Instr##_i64:
+
+#define ExpandCaseToIntTruncTypes(Instr) \
+ ExpandCaseTo32bitIntTruncTypes(Instr) \
+case Instr##_i64i8:\
+case Instr##_i64i16:\
+case Instr##_i64i32:\
+
+#define ExpandCaseToFloatTypes(Instr) \
+ case Instr##_f32: \
+case Instr##_f64:
+
+#define ExpandCaseToFloatTruncTypes(Instr) \
+case Instr##_f64f32:
+
+#define ExpandCaseTo32bitScalarTypes(Instr) \
+ ExpandCaseTo32bitIntTypes(Instr) \
+case Instr##_f32:
+
+#define ExpandCaseToAllScalarTypes(Instr) \
+ ExpandCaseToFloatTypes(Instr) \
+ExpandCaseToIntTypes(Instr)
+
+#define ExpandCaseToAllScalarTruncTypes(Instr) \
+ ExpandCaseToFloatTruncTypes(Instr) \
+ExpandCaseToIntTruncTypes(Instr)
+
+// Vector versions of above macros
+#define ExpandCaseToVectorIntTypes(Instr) \
+ case Instr##_v2i8: \
+case Instr##_v4i8: \
+case Instr##_v2i16: \
+case Instr##_v4i16: \
+case Instr##_v2i32: \
+case Instr##_v4i32: \
+case Instr##_v2i64:
+
+#define ExpandCaseToVectorIntTruncTypes(Instr) \
+case Instr##_v2i16i8: \
+case Instr##_v4i16i8: \
+case Instr##_v2i32i8: \
+case Instr##_v4i32i8: \
+case Instr##_v2i32i16: \
+case Instr##_v4i32i16: \
+case Instr##_v2i64i8: \
+case Instr##_v2i64i16: \
+case Instr##_v2i64i32:
+
+#define ExpandCaseToVectorFloatTypes(Instr) \
+ case Instr##_v2f32: \
+case Instr##_v4f32: \
+case Instr##_v2f64:
+
+#define ExpandCaseToVectorFloatTruncTypes(Instr) \
+case Instr##_v2f64f32:
+
+#define ExpandCaseToVectorByteTypes(Instr) \
+ case Instr##_v4i8:\
+case Instr##_v2i16: \
+case Instr##_v4i16:
+
+#define ExpandCaseToAllVectorTypes(Instr) \
+ ExpandCaseToVectorFloatTypes(Instr) \
+ExpandCaseToVectorIntTypes(Instr)
+
+#define ExpandCaseToAllVectorTruncTypes(Instr) \
+ ExpandCaseToVectorFloatTruncTypes(Instr) \
+ExpandCaseToVectorIntTruncTypes(Instr)
+
+#define ExpandCaseToAllTypes(Instr) \
+ ExpandCaseToAllVectorTypes(Instr) \
+ExpandCaseToAllScalarTypes(Instr)
+
+#define ExpandCaseToAllTruncTypes(Instr) \
+ ExpandCaseToAllVectorTruncTypes(Instr) \
+ExpandCaseToAllScalarTruncTypes(Instr)
+
+#define ExpandCaseToPackedTypes(Instr) \
+ case Instr##_v2i8: \
+ case Instr##_v4i8: \
+ case Instr##_v2i16: \
+ case Instr##_v4i16:
+
+#define ExpandCaseToByteShortTypes(Instr) \
+ case Instr##_i8: \
+ case Instr##_i16: \
+ ExpandCaseToPackedTypes(Instr)
+
+// Macros that expand into case statements with return values
+#define ExpandCaseTo32bitIntReturn(Instr, Return) \
+case Instr##_i8: return Return##_i8;\
+case Instr##_i16: return Return##_i16;\
+case Instr##_i32: return Return##_i32;
+
+#define ExpandCaseToIntReturn(Instr, Return) \
+ ExpandCaseTo32bitIntReturn(Instr, Return) \
+case Instr##_i64: return Return##_i64;
+
+#define ExpandCaseToFloatReturn(Instr, Return) \
+ case Instr##_f32: return Return##_f32;\
+case Instr##_f64: return Return##_f64;
+
+#define ExpandCaseToAllScalarReturn(Instr, Return) \
+ ExpandCaseToFloatReturn(Instr, Return) \
+ExpandCaseToIntReturn(Instr, Return)
+
+// These macros expand to common groupings of RegClass ID's
+#define ExpandCaseTo1CompRegID \
+case AMDIL::GPRI8RegClassID: \
+case AMDIL::GPRI16RegClassID: \
+case AMDIL::GPRI32RegClassID: \
+case AMDIL::GPRF32RegClassID:
+
+#define ExpandCaseTo2CompRegID \
+ case AMDIL::GPRI64RegClassID: \
+case AMDIL::GPRF64RegClassID: \
+case AMDIL::GPRV2I8RegClassID: \
+case AMDIL::GPRV2I16RegClassID: \
+case AMDIL::GPRV2I32RegClassID: \
+case AMDIL::GPRV2F32RegClassID:
+
+// Macros that expand to case statements for specific bitlengths
+#define ExpandCaseTo8BitType(Instr) \
+ case Instr##_i8:
+
+#define ExpandCaseTo16BitType(Instr) \
+ case Instr##_v2i8: \
+case Instr##_i16:
+
+#define ExpandCaseTo32BitType(Instr) \
+ case Instr##_v4i8: \
+case Instr##_v2i16: \
+case Instr##_i32: \
+case Instr##_f32:
+
+#define ExpandCaseTo64BitType(Instr) \
+ case Instr##_v4i16: \
+case Instr##_v2i32: \
+case Instr##_v2f32: \
+case Instr##_i64: \
+case Instr##_f64:
+
+#define ExpandCaseTo128BitType(Instr) \
+ case Instr##_v4i32: \
+case Instr##_v4f32: \
+case Instr##_v2i64: \
+case Instr##_v2f64:
+
+bool commaPrint(int i, llvm::raw_ostream &O);
+/// Helper function to get the currently get/set flags.
+void getAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes);
+void setAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes);
+
+#endif // AMDILUTILITYFUNCTIONS_H_
diff --git a/lib/Target/AMDIL/CMakeLists.txt b/lib/Target/AMDIL/CMakeLists.txt
new file mode 100644
index 0000000..dac9fe0
--- /dev/null
+++ b/lib/Target/AMDIL/CMakeLists.txt
@@ -0,0 +1,61 @@
+set(LLVM_TARGET_DEFINITIONS AMDIL.td)
+
+tablegen(LLVM AMDILGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AMDILGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AMDILGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM AMDILGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AMDILGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AMDILGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM AMDILGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM AMDILGenIntrinsics.inc -gen-tgt-intrinsic)
+tablegen(LLVM AMDILGenCodeEmitter.inc -gen-emitter)
+add_public_tablegen_target(AMDILCommonTableGen)
+
+add_llvm_target(AMDILCodeGen
+ AMDIL7XXDevice.cpp
+ AMDILCFGStructurizer.cpp
+ AMDILDevice.cpp
+ AMDILDeviceInfo.cpp
+ AMDILEvergreenDevice.cpp
+ AMDILFrameLowering.cpp
+ AMDILInstrInfo.cpp
+ AMDILIntrinsicInfo.cpp
+ AMDILISelDAGToDAG.cpp
+ AMDILISelLowering.cpp
+ AMDILMachinePeephole.cpp
+ AMDILMCCodeEmitter.cpp
+ AMDILNIDevice.cpp
+ AMDILPeepholeOptimizer.cpp
+ AMDILRegisterInfo.cpp
+ AMDILSIDevice.cpp
+ AMDILSubtarget.cpp
+ AMDILTargetMachine.cpp
+ AMDILUtilityFunctions.cpp
+ AMDGPUTargetMachine.cpp
+ AMDGPUISelLowering.cpp
+ AMDGPUConvertToISA.cpp
+ AMDGPULowerInstructions.cpp
+ AMDGPULowerShaderInstructions.cpp
+ AMDGPUReorderPreloadInstructions.cpp
+ AMDGPUInstrInfo.cpp
+ AMDGPURegisterInfo.cpp
+ AMDGPUUtil.cpp
+ R600CodeEmitter.cpp
+ R600InstrInfo.cpp
+ R600ISelLowering.cpp
+ R600KernelParameters.cpp
+ R600LowerInstructions.cpp
+ R600LowerShaderInstructions.cpp
+ R600RegisterInfo.cpp
+ SIAssignInterpRegs.cpp
+ SICodeEmitter.cpp
+ SIInstrInfo.cpp
+ SIISelLowering.cpp
+ SILowerShaderInstructions.cpp
+ SIMachineFunctionInfo.cpp
+ SIPropagateImmReads.cpp
+ SIRegisterInfo.cpp
+ )
+
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/AMDIL/LLVMBuild.txt b/lib/Target/AMDIL/LLVMBuild.txt
new file mode 100644
index 0000000..ef39aa1
--- /dev/null
+++ b/lib/Target/AMDIL/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = AMDIL
+parent = Target
+has_asmprinter = 0
+
+[component_1]
+type = Library
+name = AMDILCodeGen
+parent = AMDIL
+required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC AMDILInfo AMDILDesc
+add_to_library_groups = AMDIL
diff --git a/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.cpp b/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.cpp
new file mode 100644
index 0000000..5b62311
--- /dev/null
+++ b/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.cpp
@@ -0,0 +1,107 @@
+//===-- MCTargetDesc/AMDILMCAsmInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILMCAsmInfo.h"
+#ifndef NULL
+#define NULL 0
+#endif
+
+using namespace llvm;
+AMDILMCAsmInfo::AMDILMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
+{
+ //===------------------------------------------------------------------===//
+ HasSubsectionsViaSymbols = true;
+ HasMachoZeroFillDirective = false;
+ HasMachoTBSSDirective = false;
+ HasStaticCtorDtorReferenceInStaticMode = false;
+ LinkerRequiresNonEmptyDwarfLines = true;
+ MaxInstLength = 16;
+ PCSymbol = "$";
+ SeparatorString = "\n";
+ CommentColumn = 40;
+ CommentString = ";";
+ LabelSuffix = ":";
+ GlobalPrefix = "@";
+ PrivateGlobalPrefix = ";.";
+ LinkerPrivateGlobalPrefix = "!";
+ InlineAsmStart = ";#ASMSTART";
+ InlineAsmEnd = ";#ASMEND";
+ AssemblerDialect = 0;
+ AllowQuotesInName = false;
+ AllowNameToStartWithDigit = false;
+ AllowPeriodsInName = false;
+
+ //===--- Data Emission Directives -------------------------------------===//
+ ZeroDirective = ".zero";
+ AsciiDirective = ".ascii\t";
+ AscizDirective = ".asciz\t";
+ Data8bitsDirective = ".byte\t";
+ Data16bitsDirective = ".short\t";
+ Data32bitsDirective = ".long\t";
+ Data64bitsDirective = ".quad\t";
+ GPRel32Directive = NULL;
+ SunStyleELFSectionSwitchSyntax = true;
+ UsesELFSectionDirectiveForBSS = true;
+ HasMicrosoftFastStdCallMangling = false;
+
+ //===--- Alignment Information ----------------------------------------===//
+ AlignDirective = ".align\t";
+ AlignmentIsInBytes = true;
+ TextAlignFillValue = 0;
+
+ //===--- Global Variable Emission Directives --------------------------===//
+ GlobalDirective = ".global";
+ ExternDirective = ".extern";
+ HasSetDirective = false;
+ HasAggressiveSymbolFolding = true;
+ LCOMMDirectiveType = LCOMM::None;
+ COMMDirectiveAlignmentIsInBytes = false;
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = true;
+ HasNoDeadStrip = true;
+ HasSymbolResolver = false;
+ WeakRefDirective = ".weakref\t";
+ WeakDefDirective = ".weakdef\t";
+ LinkOnceDirective = NULL;
+ HiddenVisibilityAttr = MCSA_Hidden;
+ HiddenDeclarationVisibilityAttr = MCSA_Hidden;
+ ProtectedVisibilityAttr = MCSA_Protected;
+
+ //===--- Dwarf Emission Directives -----------------------------------===//
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+ ExceptionsType = ExceptionHandling::None;
+ DwarfUsesInlineInfoSection = false;
+ DwarfSectionOffsetDirective = ".offset";
+ DwarfUsesLabelOffsetForRanges = true;
+
+ //===--- CBE Asm Translation Table -----------------------------------===//
+ AsmTransCBE = NULL;
+}
+const char*
+AMDILMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
+{
+ switch (AS) {
+ default:
+ return NULL;
+ case 0:
+ return NULL;
+ };
+ return NULL;
+}
+
+const MCSection*
+AMDILMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const
+{
+ return NULL;
+}
diff --git a/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.h b/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.h
new file mode 100644
index 0000000..d354b03
--- /dev/null
+++ b/lib/Target/AMDIL/MCTargetDesc/AMDILMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- MCTargetDesc/AMDILMCAsmInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILMCASMINFO_H_
+#define AMDILMCASMINFO_H_
+
+#include "llvm/MC/MCAsmInfo.h"
+namespace llvm {
+ class Target;
+ class StringRef;
+
+ class AMDILMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit AMDILMCAsmInfo(const Target &T, StringRef &TT);
+ const char*
+ getDataASDirective(unsigned int Size, unsigned int AS) const;
+ const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
+ };
+} // namespace llvm
+#endif // AMDILMCASMINFO_H_
diff --git a/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.cpp b/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.cpp
new file mode 100644
index 0000000..5e60b00
--- /dev/null
+++ b/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.cpp
@@ -0,0 +1,66 @@
+#include "AMDILMCTargetDesc.h"
+#include "AMDILMCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AMDILGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AMDILGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AMDILGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createAMDILMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAMDILMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createAMDILMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAMDILMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *createAMDILMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo * X = new MCSubtargetInfo();
+ InitAMDILMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createAMDILMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+extern "C" void LLVMInitializeAMDILTargetMC() {
+
+ RegisterMCAsmInfo<AMDILMCAsmInfo> X(TheAMDILTarget);
+ RegisterMCAsmInfo<AMDILMCAsmInfo> Y(TheAMDGPUTarget);
+
+ TargetRegistry::RegisterMCCodeGenInfo(TheAMDILTarget, createAMDILMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDILMCCodeGenInfo);
+
+ TargetRegistry::RegisterMCInstrInfo(TheAMDILTarget, createAMDILMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDILMCInstrInfo);
+
+ TargetRegistry::RegisterMCRegInfo(TheAMDILTarget, createAMDILMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDILMCRegisterInfo);
+
+ TargetRegistry::RegisterMCSubtargetInfo(TheAMDILTarget, createAMDILMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDILMCSubtargetInfo);
+
+}
diff --git a/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.h b/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.h
new file mode 100644
index 0000000..370769f
--- /dev/null
+++ b/lib/Target/AMDIL/MCTargetDesc/AMDILMCTargetDesc.h
@@ -0,0 +1,36 @@
+//===-- AMDILMCTargetDesc.h - AMDIL Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AMDIL specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef AMDILMCTARGETDESC_H
+#define AMDILMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+
+extern Target TheAMDILTarget;
+extern Target TheAMDGPUTarget;
+
+} // End llvm namespace
+
+#define GET_REGINFO_ENUM
+#include "AMDILGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_ENUM
+#include "AMDILGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AMDILGenSubtargetInfo.inc"
+
+#endif // AMDILMCTARGETDESC_H
diff --git a/lib/Target/AMDIL/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDIL/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..39c9b1e
--- /dev/null
+++ b/lib/Target/AMDIL/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+add_llvm_library(LLVMAMDILDesc
+ AMDILMCTargetDesc.cpp
+ AMDILMCAsmInfo.cpp
+ )
+
+add_dependencies(LLVMAMDILDesc AMDILCommonTableGen)
diff --git a/lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt b/lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 0000000..828009e
--- /dev/null
+++ b/lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AMDILDesc
+parent = AMDIL
+required_libraries = AMDILInfo MC
+add_to_library_groups = AMDIL
diff --git a/lib/Target/AMDIL/MCTargetDesc/Makefile b/lib/Target/AMDIL/MCTargetDesc/Makefile
new file mode 100644
index 0000000..eb61a5d
--- /dev/null
+++ b/lib/Target/AMDIL/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AMDIL/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAMDILDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AMDIL/Makefile b/lib/Target/AMDIL/Makefile
new file mode 100644
index 0000000..fc49d09
--- /dev/null
+++ b/lib/Target/AMDIL/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/AMDIL/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAMDILCodeGen
+TARGET = AMDIL
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AMDILGenRegisterInfo.inc AMDILGenInstrInfo.inc \
+ AMDILGenAsmWriter.inc AMDILGenDAGISel.inc \
+ AMDILGenSubtargetInfo.inc AMDILGenCodeEmitter.inc \
+ AMDILGenCallingConv.inc AMDILGenEDInfo.inc \
+ AMDILGenIntrinsics.inc
+
+DIRS = TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AMDIL/R600ISelLowering.cpp b/lib/Target/AMDIL/R600ISelLowering.cpp
new file mode 100644
index 0000000..94c183e
--- /dev/null
+++ b/lib/Target/AMDIL/R600ISelLowering.cpp
@@ -0,0 +1,116 @@
+//===-- R600ISelLowering.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
+ AMDGPUTargetLowering(TM),
+ TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
+{
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+// setSchedulingPreference(Sched::VLIW);
+ addRegisterClass(MVT::v4f32, &AMDIL::R600_Reg128RegClass);
+ addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
+}
+
+MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr * MI, MachineBasicBlock * BB) const
+{
+ MachineFunction * MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ switch (MI->getOpcode()) {
+ default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ /* XXX: Use helper function from AMDGPULowerShaderInstructions here */
+ case AMDIL::TGID_X:
+ addLiveIn(MI, MF, MRI, AMDIL::T1_X);
+ break;
+ case AMDIL::TGID_Y:
+ addLiveIn(MI, MF, MRI, AMDIL::T1_Y);
+ break;
+ case AMDIL::TGID_Z:
+ addLiveIn(MI, MF, MRI, AMDIL::T1_Z);
+ break;
+ case AMDIL::TIDIG_X:
+ addLiveIn(MI, MF, MRI, AMDIL::T0_X);
+ break;
+ case AMDIL::TIDIG_Y:
+ addLiveIn(MI, MF, MRI, AMDIL::T0_Y);
+ break;
+ case AMDIL::TIDIG_Z:
+ addLiveIn(MI, MF, MRI, AMDIL::T0_Z);
+ break;
+ case AMDIL::NGROUPS_X:
+ lowerImplicitParameter(MI, *BB, MRI, 0);
+ break;
+ case AMDIL::NGROUPS_Y:
+ lowerImplicitParameter(MI, *BB, MRI, 1);
+ break;
+ case AMDIL::NGROUPS_Z:
+ lowerImplicitParameter(MI, *BB, MRI, 2);
+ break;
+ case AMDIL::GLOBAL_SIZE_X:
+ lowerImplicitParameter(MI, *BB, MRI, 3);
+ break;
+ case AMDIL::GLOBAL_SIZE_Y:
+ lowerImplicitParameter(MI, *BB, MRI, 4);
+ break;
+ case AMDIL::GLOBAL_SIZE_Z:
+ lowerImplicitParameter(MI, *BB, MRI, 5);
+ break;
+ case AMDIL::LOCAL_SIZE_X:
+ lowerImplicitParameter(MI, *BB, MRI, 6);
+ break;
+ case AMDIL::LOCAL_SIZE_Y:
+ lowerImplicitParameter(MI, *BB, MRI, 7);
+ break;
+ case AMDIL::LOCAL_SIZE_Z:
+ lowerImplicitParameter(MI, *BB, MRI, 8);
+ break;
+ }
+ MI->eraseFromParent();
+ return BB;
+}
+
+void R600TargetLowering::addLiveIn(MachineInstr * MI,
+ MachineFunction * MF, MachineRegisterInfo & MRI, unsigned reg) const
+{
+ MRI.addLiveIn(reg, MI->getOperand(0).getReg());
+ BuildMI(MF->front(), MF->front().begin(), DebugLoc(), TII->get(TargetOpcode::COPY))
+ .addOperand(MI->getOperand(0))
+ .addReg(reg);
+}
+
+void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineRegisterInfo & MRI, unsigned dword_offset) const
+{
+ MachineBasicBlock::iterator I = *MI;
+ unsigned offsetReg = MRI.createVirtualRegister(&AMDIL::R600_TReg32_XRegClass);
+ MRI.setRegClass(MI->getOperand(0).getReg(), &AMDIL::R600_TReg32_XRegClass);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::MOV), offsetReg)
+ .addReg(AMDIL::ALU_LITERAL_X)
+ .addImm(dword_offset * 4);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::VTX_READ_eg))
+ .addOperand(MI->getOperand(0))
+ .addReg(offsetReg)
+ .addImm(0);
+}
diff --git a/lib/Target/AMDIL/R600ISelLowering.h b/lib/Target/AMDIL/R600ISelLowering.h
new file mode 100644
index 0000000..ed59a00
--- /dev/null
+++ b/lib/Target/AMDIL/R600ISelLowering.h
@@ -0,0 +1,43 @@
+//===-- R600ISelLowering.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600ISELLOWERING_H
+#define R600ISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+
+namespace llvm {
+
+class R600InstrInfo;
+
+class R600TargetLowering : public AMDGPUTargetLowering
+{
+public:
+ R600TargetLowering(TargetMachine &TM);
+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock * BB) const;
+
+private:
+ const R600InstrInfo * TII;
+
+ void addLiveIn(MachineInstr * MI, MachineFunction * MF,
+ MachineRegisterInfo & MRI, unsigned reg) const;
+
+ void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineRegisterInfo & MRI, unsigned dword_offset) const;
+
+};
+
+} // End namespace llvm;
+
+#endif // R600ISELLOWERING_H
diff --git a/lib/Target/AMDIL/R600InstrInfo.cpp b/lib/Target/AMDIL/R600InstrInfo.cpp
new file mode 100644
index 0000000..0c7ffc4
--- /dev/null
+++ b/lib/Target/AMDIL/R600InstrInfo.cpp
@@ -0,0 +1,123 @@
+//===-- R600InstrInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600InstrInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600RegisterInfo.h"
+
+using namespace llvm;
+
+R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
+ : AMDGPUInstrInfo(tm),
+ RI(tm, *this),
+ TM(tm)
+ { }
+
+const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
+{
+ return RI;
+}
+
+bool R600InstrInfo::isTrig(const MachineInstr &MI) const
+{
+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
+}
+
+void
+R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+
+ unsigned subRegMap[4] = {AMDIL::sel_x, AMDIL::sel_y, AMDIL::sel_z, AMDIL::sel_w};
+
+ if (AMDIL::R600_Reg128RegClass.contains(DestReg)
+ && AMDIL::R600_Reg128RegClass.contains(SrcReg)) {
+ for (unsigned i = 0; i < 4; i++) {
+ BuildMI(MBB, MI, DL, get(AMDIL::MOV))
+ .addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define)
+ .addReg(RI.getSubReg(SrcReg, subRegMap[i]))
+ .addReg(DestReg, RegState::Define | RegState::Implicit);
+ }
+ } else {
+
+ /* We can't copy vec4 registers */
+ assert(!AMDIL::R600_Reg128RegClass.contains(DestReg)
+ && !AMDIL::R600_Reg128RegClass.contains(SrcReg));
+
+ BuildMI(MBB, MI, DL, get(AMDIL::MOV), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+}
+
+unsigned R600InstrInfo::getISAOpcode(unsigned opcode) const
+{
+ switch (opcode) {
+ default: return AMDGPUInstrInfo::getISAOpcode(opcode);
+ case AMDIL::CUSTOM_ADD_i32:
+ return AMDIL::ADD_INT;
+ case AMDIL::CUSTOM_XOR_i32:
+ return AMDIL::XOR_INT;
+ case AMDIL::MOVE_f32:
+ case AMDIL::MOVE_i32:
+ return AMDIL::MOV;
+ case AMDIL::SHR_i32:
+ return getLSHRop();
+ }
+}
+
+unsigned R600InstrInfo::getLSHRop() const
+{
+ unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration();
+ if (gen < AMDILDeviceInfo::HD5XXX) {
+ return AMDIL::LSHR_r600;
+ } else {
+ return AMDIL::LSHR_eg;
+ }
+}
+
+unsigned R600InstrInfo::getMULHI_UINT() const
+{
+ unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration();
+
+ if (gen < AMDILDeviceInfo::HD5XXX) {
+ return AMDIL::MULHI_UINT_r600;
+ } else {
+ return AMDIL::MULHI_UINT_eg;
+ }
+}
+
+unsigned R600InstrInfo::getMULLO_UINT() const
+{
+ unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration();
+
+ if (gen < AMDILDeviceInfo::HD5XXX) {
+ return AMDIL::MULLO_UINT_r600;
+ } else {
+ return AMDIL::MULLO_UINT_eg;
+ }
+}
+
+unsigned R600InstrInfo::getRECIP_UINT() const
+{
+ const AMDILDevice * dev = TM.getSubtarget<AMDILSubtarget>().device();
+
+ if (dev->getGeneration() < AMDILDeviceInfo::HD5XXX) {
+ return AMDIL::RECIP_UINT_r600;
+ } else if (dev->getDeviceFlag() != OCL_DEVICE_CAYMAN) {
+ return AMDIL::RECIP_UINT_eg;
+ } else {
+ return AMDIL::RECIP_UINT_cm;
+ }
+}
diff --git a/lib/Target/AMDIL/R600InstrInfo.h b/lib/Target/AMDIL/R600InstrInfo.h
new file mode 100644
index 0000000..aedaa9f
--- /dev/null
+++ b/lib/Target/AMDIL/R600InstrInfo.h
@@ -0,0 +1,74 @@
+//===-- R600InstrInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600INSTRUCTIONINFO_H_
+#define R600INSTRUCTIONINFO_H_
+
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "R600RegisterInfo.h"
+
+#include <map>
+
+namespace llvm {
+
+ struct InstrGroup {
+ unsigned amdil;
+ unsigned r600;
+ unsigned eg;
+ unsigned cayman;
+ };
+
+ class AMDGPUTargetMachine;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineInstrBuilder;
+
+ class R600InstrInfo : public AMDGPUInstrInfo {
+ private:
+ const R600RegisterInfo RI;
+ AMDGPUTargetMachine &TM;
+
+ public:
+ explicit R600InstrInfo(AMDGPUTargetMachine &tm);
+
+ const R600RegisterInfo &getRegisterInfo() const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual unsigned getISAOpcode(unsigned opcode) const;
+ bool isTrig(const MachineInstr &MI) const;
+
+ unsigned getLSHRop() const;
+ unsigned getMULHI_UINT() const;
+ unsigned getMULLO_UINT() const;
+ unsigned getRECIP_UINT() const;
+
+ };
+
+} // End llvm namespace
+
+namespace R600_InstFlag {
+ enum TIF {
+ TRANS_ONLY = (1 << 0),
+ TEX = (1 << 1),
+ REDUCTION = (1 << 2),
+ FC = (1 << 3),
+ TRIG = (1 << 4),
+ OP3 = (1 << 5)
+ };
+}
+
+#endif // R600INSTRINFO_H_
diff --git a/lib/Target/AMDIL/R600RegisterInfo.cpp b/lib/Target/AMDIL/R600RegisterInfo.cpp
new file mode 100644
index 0000000..96507b1
--- /dev/null
+++ b/lib/Target/AMDIL/R600RegisterInfo.cpp
@@ -0,0 +1,102 @@
+//===-- R600RegisterInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600RegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
+{
+ BitVector Reserved(getNumRegs());
+ Reserved.set(AMDIL::ZERO);
+ Reserved.set(AMDIL::HALF);
+ Reserved.set(AMDIL::ONE);
+ Reserved.set(AMDIL::ONE_INT);
+ Reserved.set(AMDIL::NEG_HALF);
+ Reserved.set(AMDIL::NEG_ONE);
+ Reserved.set(AMDIL::PV_X);
+ Reserved.set(AMDIL::ALU_LITERAL_X);
+
+ for (TargetRegisterClass::iterator I = AMDIL::R600_CReg32RegClass.begin(),
+ E = AMDIL::R600_CReg32RegClass.end(); I != E; ++I) {
+ Reserved.set(*I);
+ }
+
+ for (MachineFunction::const_iterator BB = MF.begin(),
+ BB_E = MF.end(); BB != BB_E; ++BB) {
+ const MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ const MachineInstr &MI = *I;
+ if (MI.getOpcode() == AMDIL::RESERVE_REG) {
+ if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) {
+ Reserved.set(MI.getOperand(0).getReg());
+ }
+ }
+ }
+ }
+ return Reserved;
+}
+
+const TargetRegisterClass *
+R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
+{
+ switch (rc->getID()) {
+ case AMDIL::GPRV4F32RegClassID:
+ case AMDIL::GPRV4I32RegClassID:
+ return &AMDIL::R600_Reg128RegClass;
+ case AMDIL::GPRF32RegClassID:
+ case AMDIL::GPRI32RegClassID:
+ return &AMDIL::R600_Reg32RegClass;
+ default: return rc;
+ }
+}
+
+unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const
+{
+ switch(reg) {
+ case AMDIL::ZERO: return 248;
+ case AMDIL::ONE:
+ case AMDIL::NEG_ONE: return 249;
+ case AMDIL::ONE_INT: return 250;
+ case AMDIL::HALF:
+ case AMDIL::NEG_HALF: return 252;
+ case AMDIL::ALU_LITERAL_X: return 253;
+ default: return getHWRegIndexGen(reg);
+ }
+}
+
+unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
+{
+ switch(reg) {
+ case AMDIL::ZERO:
+ case AMDIL::ONE:
+ case AMDIL::ONE_INT:
+ case AMDIL::NEG_ONE:
+ case AMDIL::HALF:
+ case AMDIL::NEG_HALF:
+ case AMDIL::ALU_LITERAL_X:
+ return 0;
+ default: return getHWRegChanGen(reg);
+ }
+}
+
+#include "R600HwRegInfo.include"
diff --git a/lib/Target/AMDIL/R600RegisterInfo.h b/lib/Target/AMDIL/R600RegisterInfo.h
new file mode 100644
index 0000000..95a44f9
--- /dev/null
+++ b/lib/Target/AMDIL/R600RegisterInfo.h
@@ -0,0 +1,44 @@
+//===-- R600RegisterInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600REGISTERINFO_H_
+#define R600REGISTERINFO_H_
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDILRegisterInfo.h"
+
+namespace llvm {
+
+ class R600TargetMachine;
+ class TargetInstrInfo;
+
+ struct R600RegisterInfo : public AMDGPURegisterInfo
+ {
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass * rc) const;
+ unsigned getHWRegIndex(unsigned reg) const;
+ unsigned getHWRegChan(unsigned reg) const;
+private:
+ unsigned getHWRegChanGen(unsigned reg) const;
+ unsigned getHWRegIndexGen(unsigned reg) const;
+ };
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/lib/Target/AMDIL/SIISelLowering.cpp b/lib/Target/AMDIL/SIISelLowering.cpp
new file mode 100644
index 0000000..1a4b47e
--- /dev/null
+++ b/lib/Target/AMDIL/SIISelLowering.cpp
@@ -0,0 +1,151 @@
+//===-- SIISelLowering.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+SITargetLowering::SITargetLowering(TargetMachine &TM) :
+ AMDGPUTargetLowering(TM),
+ TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo()))
+{
+ addRegisterClass(MVT::v4f32, &AMDIL::VReg_128RegClass);
+ addRegisterClass(MVT::f32, &AMDIL::VReg_32RegClass);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
+}
+
+MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr * MI, MachineBasicBlock * BB) const
+{
+ const struct TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
+ MachineBasicBlock::iterator I = MI;
+
+ if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
+ AppendS_WAITCNT(MI, *BB, llvm::next(I));
+ }
+
+ switch (MI->getOpcode()) {
+ default:
+ return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case AMDIL::SI_INTERP:
+ LowerSI_INTERP(MI, *BB, I, MRI);
+ break;
+ case AMDIL::SI_INTERP_CONST:
+ LowerSI_INTERP_CONST(MI, *BB, I);
+ break;
+ case AMDIL::SI_V_CNDLT:
+ LowerSI_V_CNDLT(MI, *BB, I, MRI);
+ break;
+ case AMDIL::USE_SGPR_32:
+ case AMDIL::USE_SGPR_64:
+ lowerUSE_SGPR(MI, BB->getParent(), MRI);
+ MI->eraseFromParent();
+ break;
+ case AMDIL::VS_LOAD_BUFFER_INDEX:
+ addLiveIn(MI, BB->getParent(), MRI, TII, AMDIL::VGPR0);
+ MI->eraseFromParent();
+ break;
+ }
+ return BB;
+}
+
+void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I) const
+{
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::S_WAITCNT))
+ .addImm(0);
+}
+
+void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
+{
+ unsigned tmp = MRI.createVirtualRegister(&AMDIL::VReg_32RegClass);
+ MachineOperand dst = MI->getOperand(0);
+ MachineOperand iReg = MI->getOperand(1);
+ MachineOperand jReg = MI->getOperand(2);
+ MachineOperand attr_chan = MI->getOperand(3);
+ MachineOperand attr = MI->getOperand(4);
+ MachineOperand params = MI->getOperand(5);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::S_MOV_B32))
+ .addReg(AMDIL::M0)
+ .addOperand(params);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::V_INTERP_P1_F32), tmp)
+ .addOperand(iReg)
+ .addOperand(attr_chan)
+ .addOperand(attr);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::V_INTERP_P2_F32))
+ .addOperand(dst)
+ .addReg(tmp)
+ .addOperand(jReg)
+ .addOperand(attr_chan)
+ .addOperand(attr);
+
+ MI->eraseFromParent();
+}
+
+void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
+ MachineBasicBlock &BB, MachineBasicBlock::iterator I) const
+{
+ MachineOperand dst = MI->getOperand(0);
+ MachineOperand attr_chan = MI->getOperand(1);
+ MachineOperand attr = MI->getOperand(2);
+ MachineOperand params = MI->getOperand(3);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::S_MOV_B32))
+ .addReg(AMDIL::M0)
+ .addOperand(params);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::V_INTERP_MOV_F32))
+ .addOperand(dst)
+ .addOperand(attr_chan)
+ .addOperand(attr);
+
+ MI->eraseFromParent();
+}
+
+void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
+{
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::V_CMP_LT_F32_e32))
+ .addOperand(MI->getOperand(1))
+ .addReg(AMDIL::SREG_LIT_0);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDIL::V_CNDMASK_B32))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(3));
+
+ MI->eraseFromParent();
+}
+
+void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI,
+ MachineFunction * MF, MachineRegisterInfo & MRI) const
+{
+ const struct TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
+ unsigned dstReg = MI->getOperand(0).getReg();
+ int64_t newIndex = MI->getOperand(1).getImm();
+ const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg);
+
+ unsigned newReg = dstClass->getRegister(newIndex);
+ addLiveIn(MI, MF, MRI, TII, newReg);
+}
+
diff --git a/lib/Target/AMDIL/SIISelLowering.h b/lib/Target/AMDIL/SIISelLowering.h
new file mode 100644
index 0000000..e7a79f8
--- /dev/null
+++ b/lib/Target/AMDIL/SIISelLowering.h
@@ -0,0 +1,44 @@
+//===-- SIISelLowering.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SIISELLOWERING_H
+#define SIISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+#include "SIInstrInfo.h"
+
+namespace llvm {
+
+class SITargetLowering : public AMDGPUTargetLowering
+{
+ const SIInstrInfo * TII;
+
+ void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I) const;
+ void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
+ void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I) const;
+ void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
+ void lowerUSE_SGPR(MachineInstr *MI, MachineFunction * MF,
+ MachineRegisterInfo & MRI) const;
+public:
+ SITargetLowering(TargetMachine &tm);
+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
+ MachineBasicBlock * BB) const;
+};
+
+} // End namespace llvm
+
+#endif //SIISELLOWERING_H
diff --git a/lib/Target/AMDIL/SIInstrInfo.cpp b/lib/Target/AMDIL/SIInstrInfo.cpp
new file mode 100644
index 0000000..f9eb59a
--- /dev/null
+++ b/lib/Target/AMDIL/SIInstrInfo.cpp
@@ -0,0 +1,135 @@
+//===-- SIInstrInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIInstrInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
+ : AMDGPUInstrInfo(tm),
+ RI(tm, *this),
+ TM(tm)
+ { }
+
+const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const
+{
+ return RI;
+}
+
+void
+SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+ BuildMI(MBB, MI, DL, get(AMDIL::V_MOV_B32_e32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+unsigned SIInstrInfo::getEncodingType(const MachineInstr &MI) const
+{
+ return get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
+}
+
+unsigned SIInstrInfo::getEncodingBytes(const MachineInstr &MI) const
+{
+
+ /* Instructions with literal constants are expanded to 64-bits, and
+ * the constant is stored in bits [63:32] */
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ if (MI.getOperand(i).getType() == MachineOperand::MO_FPImmediate) {
+ return 8;
+ }
+ }
+
+ /* This instruction always has a literal */
+ if (MI.getOpcode() == AMDIL::S_MOV_IMM_I32) {
+ return 8;
+ }
+
+ unsigned encoding_type = getEncodingType(MI);
+ switch (encoding_type) {
+ case SIInstrEncodingType::EXP:
+ case SIInstrEncodingType::LDS:
+ case SIInstrEncodingType::MUBUF:
+ case SIInstrEncodingType::MTBUF:
+ case SIInstrEncodingType::MIMG:
+ case SIInstrEncodingType::VOP3:
+ return 8;
+ default:
+ return 4;
+ }
+}
+
+MachineInstr * SIInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const
+{
+
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDIL::ABS_f32: return convertABS_f32(MI, MF, DL);
+ }
+
+ MachineInstr * newMI = AMDGPUInstrInfo::convertToISA(MI, MF, DL);
+ const MCInstrDesc &newDesc = get(newMI->getOpcode());
+
+ /* If this instruction was converted to a VOP3, we need to add the extra
+ * operands for abs, clamp, omod, and negate. */
+ if (getEncodingType(*newMI) == SIInstrEncodingType::VOP3
+ && newMI->getNumOperands() < newDesc.getNumOperands()) {
+ MachineInstrBuilder builder(newMI);
+ for (unsigned op_idx = newMI->getNumOperands();
+ op_idx < newDesc.getNumOperands(); op_idx++) {
+ builder.addImm(0);
+ }
+ }
+ return newMI;
+}
+
+unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const
+{
+ switch (AMDILopcode) {
+ case AMDIL::MAD_f32: return AMDIL::V_MAD_LEGACY_F32;
+ default: return AMDGPUInstrInfo::getISAOpcode(AMDILopcode);
+ }
+}
+
+MachineInstr * SIInstrInfo::convertABS_f32(MachineInstr & absInstr,
+ MachineFunction &MF, DebugLoc DL) const
+{
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineOperand &dst = absInstr.getOperand(0);
+
+ /* Convert the desination register to the VReg_32 class */
+ if (TargetRegisterInfo::isVirtualRegister(dst.getReg())) {
+ MRI.setRegClass(dst.getReg(), &AMDIL::VReg_32RegClass);
+ }
+
+ return BuildMI(MF, DL, get(AMDIL::V_MOV_B32_e64))
+ .addOperand(absInstr.getOperand(0))
+ .addOperand(absInstr.getOperand(1))
+ /* VSRC1-2 are unused, but we still need to fill all the
+ * operand slots, so we just reuse the VSRC0 operand */
+ .addOperand(absInstr.getOperand(1))
+ .addOperand(absInstr.getOperand(1))
+ .addImm(1) // ABS
+ .addImm(0) // CLAMP
+ .addImm(0) // OMOD
+ .addImm(0); // NEG
+}
diff --git a/lib/Target/AMDIL/SIInstrInfo.h b/lib/Target/AMDIL/SIInstrInfo.h
new file mode 100644
index 0000000..32baaa9
--- /dev/null
+++ b/lib/Target/AMDIL/SIInstrInfo.h
@@ -0,0 +1,92 @@
+//===-- SIInstrInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIINSTRINFO_H
+#define SIINSTRINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "SIRegisterInfo.h"
+
+namespace llvm {
+
+ class SIInstrInfo : public AMDGPUInstrInfo {
+ private:
+ const SIRegisterInfo RI;
+ AMDGPUTargetMachine &TM;
+
+ MachineInstr * convertABS_f32(MachineInstr & absInstr, MachineFunction &MF,
+ DebugLoc DL) const;
+
+ public:
+ explicit SIInstrInfo(AMDGPUTargetMachine &tm);
+
+ const SIRegisterInfo &getRegisterInfo() const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ unsigned getEncodingType(const MachineInstr &MI) const;
+
+ unsigned getEncodingBytes(const MachineInstr &MI) const;
+
+ uint64_t getBinaryCode(const MachineInstr &MI, bool encodOpcode = false) const;
+
+ virtual MachineInstr * convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const;
+
+ virtual unsigned getISAOpcode(unsigned AMDILopcode) const;
+
+ };
+
+} // End namespace llvm
+
+/* These must be kept in sync with SIInstructions.td and also the
+ * InstrEncodingInfo array in SIInstrInfo.cpp.
+ *
+ * NOTE: This enum is only used to identify the encoding type within LLVM,
+ * the actual encoding type that is part of the instruction format is different
+ */
+namespace SIInstrEncodingType {
+ enum Encoding {
+ EXP = 0,
+ LDS = 1,
+ MIMG = 2,
+ MTBUF = 3,
+ MUBUF = 4,
+ SMRD = 5,
+ SOP1 = 6,
+ SOP2 = 7,
+ SOPC = 8,
+ SOPK = 9,
+ SOPP = 10,
+ VINTRP = 11,
+ VOP1 = 12,
+ VOP2 = 13,
+ VOP3 = 14,
+ VOPC = 15
+ };
+}
+
+#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
+
+namespace SIInstrFlags {
+ enum Flags {
+ /* First 4 bits are the instruction encoding */
+ NEED_WAIT = 1 << 4
+ };
+}
+
+#endif //SIINSTRINFO_H
diff --git a/lib/Target/AMDIL/SIMachineFunctionInfo.cpp b/lib/Target/AMDIL/SIMachineFunctionInfo.cpp
new file mode 100644
index 0000000..eace40c2
--- /dev/null
+++ b/lib/Target/AMDIL/SIMachineFunctionInfo.cpp
@@ -0,0 +1,22 @@
+//===-- SIMachineFunctionInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIMachineFunctionInfo.h"
+
+using namespace llvm;
+
+SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
+ : MachineFunctionInfo(),
+ spi_ps_input_addr(0)
+ { }
diff --git a/lib/Target/AMDIL/SIMachineFunctionInfo.h b/lib/Target/AMDIL/SIMachineFunctionInfo.h
new file mode 100644
index 0000000..5647de9
--- /dev/null
+++ b/lib/Target/AMDIL/SIMachineFunctionInfo.h
@@ -0,0 +1,35 @@
+//===-- SIMachineFunctionInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef _SIMACHINEFUNCTIONINFO_H_
+#define _SIMACHINEFUNCTIONINFO_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class SIMachineFunctionInfo : public MachineFunctionInfo {
+
+ private:
+
+ public:
+ SIMachineFunctionInfo(const MachineFunction &MF);
+ unsigned spi_ps_input_addr;
+
+};
+
+} // End namespace llvm
+
+
+#endif //_SIMACHINEFUNCTIONINFO_H_
diff --git a/lib/Target/AMDIL/SIRegisterInfo.cpp b/lib/Target/AMDIL/SIRegisterInfo.cpp
new file mode 100644
index 0000000..203ac9f
--- /dev/null
+++ b/lib/Target/AMDIL/SIRegisterInfo.cpp
@@ -0,0 +1,66 @@
+//===-- SIRegisterInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIRegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPUUtil.h"
+
+using namespace llvm;
+
+SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const
+{
+ BitVector Reserved(getNumRegs());
+ return Reserved;
+}
+
+unsigned SIRegisterInfo::getBinaryCode(unsigned reg) const
+{
+ switch (reg) {
+ case AMDIL::M0: return 124;
+ case AMDIL::SREG_LIT_0: return 128;
+ default: return getHWRegNum(reg);
+ }
+}
+
+bool SIRegisterInfo::isBaseRegClass(unsigned regClassID) const
+{
+ switch (regClassID) {
+ default: return true;
+ case AMDIL::AllReg_32RegClassID:
+ case AMDIL::AllReg_64RegClassID:
+ return false;
+ }
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
+{
+ switch (rc->getID()) {
+ case AMDIL::GPRF32RegClassID:
+ return &AMDIL::VReg_32RegClass;
+ case AMDIL::GPRV4F32RegClassID:
+ case AMDIL::GPRV4I32RegClassID:
+ return &AMDIL::VReg_128RegClass;
+ default: return rc;
+ }
+}
+
+#include "SIRegisterGetHWRegNum.include"
diff --git a/lib/Target/AMDIL/SIRegisterInfo.h b/lib/Target/AMDIL/SIRegisterInfo.h
new file mode 100644
index 0000000..c797e3c
--- /dev/null
+++ b/lib/Target/AMDIL/SIRegisterInfo.h
@@ -0,0 +1,46 @@
+//===-- SIRegisterInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIREGISTERINFO_H_
+#define SIREGISTERINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+
+namespace llvm {
+
+ class AMDGPUTargetMachine;
+ class TargetInstrInfo;
+
+ struct SIRegisterInfo : public AMDGPURegisterInfo
+ {
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+ virtual unsigned getBinaryCode(unsigned reg) const;
+
+ virtual bool isBaseRegClass(unsigned regClassID) const;
+
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass * rc) const;
+
+ unsigned getHWRegNum(unsigned reg) const;
+
+ };
+
+} // End namespace llvm
+
+#endif // SIREGISTERINFO_H_
diff --git a/lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp b/lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp
new file mode 100644
index 0000000..5dee0cb
--- /dev/null
+++ b/lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp
@@ -0,0 +1,32 @@
+//===-- TargetInfo/AMDILTargetInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+/// The target for the AMDIL backend
+Target llvm::TheAMDILTarget;
+
+/// The target for the AMDGPU backend
+Target llvm::TheAMDGPUTarget;
+
+/// Extern function to initialize the targets for the AMDIL backend
+extern "C" void LLVMInitializeAMDILTargetInfo() {
+ RegisterTarget<Triple::amdil, false>
+ IL(TheAMDILTarget, "amdil", "ATI graphics cards");
+
+ RegisterTarget<Triple::r600, false>
+ R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
+}
diff --git a/lib/Target/AMDIL/TargetInfo/CMakeLists.txt b/lib/Target/AMDIL/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..15ff3f9
--- /dev/null
+++ b/lib/Target/AMDIL/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAMDILInfo
+ AMDILTargetInfo.cpp
+ )
+
+add_dependencies(LLVMAMDILInfo AMDILCodeGenTable_gen)
diff --git a/lib/Target/AMDIL/TargetInfo/LLVMBuild.txt b/lib/Target/AMDIL/TargetInfo/LLVMBuild.txt
new file mode 100644
index 0000000..746d7cd
--- /dev/null
+++ b/lib/Target/AMDIL/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/AMDIL/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AMDILInfo
+parent = AMDIL
+required_libraries = MC Support
+add_to_library_groups = AMDIL
diff --git a/lib/Target/AMDIL/TargetInfo/Makefile b/lib/Target/AMDIL/TargetInfo/Makefile
new file mode 100644
index 0000000..2c02e9d
--- /dev/null
+++ b/lib/Target/AMDIL/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AMDIL/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAMDILInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
--
1.7.7.6
More information about the llvm-commits
mailing list