[llvm-branch-commits] [llvm-branch] r161895 [1/5] - in /llvm/branches/AMDILBackend/lib/Target: ./ AMDIL/ AMDIL/TargetInfo/

Victor Oliveira Victor.Oliveira at amd.com
Tue Aug 14 14:38:59 PDT 2012


Author: victorm
Date: Tue Aug 14 16:38:58 2012
New Revision: 161895

URL: http://llvm.org/viewvc/llvm-project?rev=161895&view=rev
Log:
AMDIL Backend - First commit


Added:
    llvm/branches/AMDILBackend/lib/Target/AMDIL/
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAlgorithms.tpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCallingConv.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrConversion.macros
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMPC.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMVersion.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem32.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem64.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMultiClass.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNodes.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILOperands.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPatterns.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManagerImpl.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPrintfConvert.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILProfiles.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsScalar.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV2.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV4.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalar.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarW.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarX.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarY.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarZ.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2XY.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2ZW.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV4.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTokenDesc.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILUtilityFunctions.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILUtilityFunctions.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILVersion.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/CMakeLists.txt
    llvm/branches/AMDILBackend/lib/Target/AMDIL/LLVMBuild.txt
    llvm/branches/AMDILBackend/lib/Target/AMDIL/Processors.td
    llvm/branches/AMDILBackend/lib/Target/AMDIL/TargetInfo/
    llvm/branches/AMDILBackend/lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/TargetInfo/CMakeLists.txt
    llvm/branches/AMDILBackend/lib/Target/AMDIL/TargetInfo/LLVMBuild.txt
    llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodata.cpp
    llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodata.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodb.h
    llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodb_gen.h
Modified:
    llvm/branches/AMDILBackend/lib/Target/LLVMBuild.txt

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,318 @@
+//===-- AMDIL.h -----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the
+// LLVM AMDIL back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H_
+#define AMDIL_H_
+#include "AMDILLLVMPC.h"
+#include "AMDILLLVMVersion.h"
+#include "AMDILInstPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define AMDIL_MAJOR_VERSION 3
+#define AMDIL_MINOR_VERSION 1
+#define AMDIL_REVISION_NUMBER 104
+#define AMDIL_20_REVISION_NUMBER 88
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+#define OPENCL_MAX_NUM_SEMAPHORES 15
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID     1
+#define DEFAULT_GDS_ID     1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS  8
+
+// SC->CAL version matchings.
+#define CAL_CACHED_ALIGNED_UAVS          1679
+#define CAL_VERSION_SC_156               1650
+#define CAL_VERSION_SC_155               1630
+#define CAL_VERSION_SC_154               1624
+#define CAL_VERSION_SC_153               1616
+#define CAL_VERSION_SC_152               1603
+#define CAL_VERSION_SC_151               1589
+#define CAL_VERSION_SC_150               1561
+#define CAL_VERSION_SC_149               CAL_VERSION_SC_150
+#define CAL_VERSION_SC_148               1525
+#define CAL_VERSION_SC_147               CAL_VERSION_SC_148
+#define CAL_VERSION_SC_146               CAL_VERSION_SC_148
+#define CAL_VERSION_SC_145               1451
+#define CAL_VERSION_SC_144               CAL_VERSION_SC_145
+#define CAL_VERSION_SC_143               1441
+#define CAL_VERSION_SC_142               CAL_VERSION_SC_142
+#define CAL_VERSION_SC_141               1420
+#define CAL_VERSION_SC_140               1400
+#define CAL_VERSION_SC_139               1387
+#define CAL_VERSION_SC_138               CAL_VERSION_SC_139
+#define CAL_APPEND_BUFFER_SUPPORT        1340
+#define CAL_VERSION_SC_137               1331
+#define CAL_VERSION_SC_136                982
+#define CAL_VERSION_SC_135                950
+#define CAL_VERSION_GLOBAL_RETURN_BUFFER  990
+
+#define OCL_DEVICE_RV710        0x00001
+#define OCL_DEVICE_RV730        0x00002
+#define OCL_DEVICE_RV770        0x00004
+#define OCL_DEVICE_CEDAR        0x00008
+#define OCL_DEVICE_REDWOOD      0x00010
+#define OCL_DEVICE_JUNIPER      0x00020
+#define OCL_DEVICE_CYPRESS      0x00040
+#define OCL_DEVICE_CAICOS       0x00080
+#define OCL_DEVICE_TURKS        0x00100
+#define OCL_DEVICE_BARTS        0x00200
+#define OCL_DEVICE_CAYMAN       0x00400
+#define OCL_DEVICE_TAHITI       0x00800
+#define OCL_DEVICE_PITCAIRN     0x01000
+#define OCL_DEVICE_CAPEVERDE    0x02000
+#define OCL_DEVICE_TRINITY      0x04000
+#define OCL_DEVICE_DOGS         0x08000
+#define OCL_DEVICE_CATS         0x10000
+#define OCL_DEVICE_BUNNIES      0x20000
+#define OCL_DEVICE_ALL          0xFFFFF
+
+/// The number of function ID's that are reserved for
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+namespace llvm
+{
+class AMDILInstrPrinter;
+class AMDILTargetMachine;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+/// Instruction selection passes.
+FunctionPass*
+createAMDILISelDag(AMDILTargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILBarrierDetect(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILPrintfConvert(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILInlinePass(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILPeepholeOpt(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+/// Pre regalloc passes.
+FunctionPass*
+createAMDILPointerManager(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILMachinePeephole(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+/// Pre emit passes.
+FunctionPass* createMachinePostDominatorTreePass();
+FunctionPass*
+createAMDILCFGPreparationPass();
+FunctionPass*
+createAMDILCFGStructurizerPass();
+FunctionPass*
+createAMDILLiteralManager(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILIOExpansion(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILSwizzleEncoder(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+/// Instruction Emission Passes
+AMDILInstPrinter *createAMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+    const MCRegisterInfo &MRI);
+
+extern Target TheAMDILTarget;
+} // end namespace llvm;
+
+#define GET_REGINFO_ENUM
+#include "AMDILGenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "AMDILGenInstrInfo.inc"
+
+/// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm
+{
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDILAS
+{
+enum AddressSpaces {
+  PRIVATE_ADDRESS  = 0, // Address space for private memory.
+  GLOBAL_ADDRESS   = 1, // Address space for global memory.
+  CONSTANT_ADDRESS = 2, // Address space for constant memory.
+  LOCAL_ADDRESS    = 3, // Address space for local memory.
+  REGION_ADDRESS   = 4, // Address space for region memory.
+  GLOBAL_HOST_ADDRESS = 5, // Address space with global host endianness.
+  CONSTANT_HOST_ADDRESS = 6, // Address space with constant host endianness.
+  FLAT_ADDRESS     = 7, // Address space for flat memory.
+  ADDRESS_NONE     = 8  // Address space for unknown memory.
+};
+
+// We are piggybacking on the CommentFlag enum in MachineInstr.h to
+// set bits in AsmPrinterFlags of the MachineInstruction. We will
+// start at bit 16 and allocate down while LLVM will start at bit
+// 1 and allocate up.
+
+// This union/struct combination is an easy way to read out the
+// exact bits that are needed.
+typedef union ResourceRec {
+  struct {
+#ifdef __BIG_ENDIAN__
+    unsigned short CacheableRead : 1;  // Flag to specify if the read is
+    // cacheable. (Permanent)
+    unsigned short HardwareInst  : 1;  // Flag to specify that this instruction
+    // is a hardware instruction. (Permanent)
+    unsigned short ResourceID    : 10; // Flag to specify the resource ID for
+    // the op. (Permanent)
+    unsigned short PointerPath   : 1;  // Flag to specify if the op is on the
+    // pointer path.
+    unsigned short ByteStore     : 1;  // Flag to specify if the op is byte
+    // store op.
+    unsigned short ConflictPtr   : 1;  // Flag to specify that the pointer has
+    // a conflict.
+    unsigned short isImage       : 1;  // Reserved for future use.
+#else
+    unsigned short isImage       : 1;  // Reserved for future use/llvm.
+    unsigned short ConflictPtr   : 1;  // Flag to specify that the pointer has a
+    // conflict.
+    unsigned short ByteStore     : 1;  // Flag to specify if the op is a byte
+    // store op.
+    unsigned short PointerPath   : 1;  // Flag to specify if the op is on the
+    // pointer path.
+    unsigned short ResourceID    : 10; // Flag to specify the resourece ID for
+    // the op. (Permanent)
+    unsigned short HardwareInst  : 1;  // Flag to specify that this instruction
+    // is a hardware instruction. (Permanent)
+    unsigned short CacheableRead : 1;  // Flag to specify if the read is
+    // cacheable. (Permanent)
+#endif
+  } bits;
+  unsigned short u16all;
+} InstrResEnc;
+
+} // namespace AMDILAS
+
+// The OpSwizzle encodes a subset of all possible
+// swizzle combinations into a number of bits using
+// only the combinations utilized by the backend.
+// The lower 128 are for source swizzles and the
+// upper 128 or for destination swizzles.
+// The valid mappings can be found in the
+// getSrcSwizzle and getDstSwizzle functions of
+// AMDILUtilityFunctions.cpp.
+typedef union SwizzleRec {
+  struct {
+#ifdef __BIG_ENDIAN__
+    unsigned char dst : 1;
+    unsigned char swizzle : 7;
+#else
+    unsigned char swizzle : 7;
+    unsigned char dst : 1;
+#endif
+  } bits;
+  unsigned char u8all;
+} OpSwizzle;
+// Enums corresponding to AMDIL condition codes for IL.  These
+// values must be kept in sync with the ones in the .td file.
+namespace AMDILCC
+{
+enum CondCodes {
+  // AMDIL specific condition codes. These correspond to the IL_CC_*
+  // in AMDILInstrInfo.td and must be kept in the same order.
+  IL_CC_D_EQ  =  0,   // DEQ instruction.
+  IL_CC_D_GE  =  1,   // DGE instruction.
+  IL_CC_D_LT  =  2,   // DLT instruction.
+  IL_CC_D_NE  =  3,   // DNE instruction.
+  IL_CC_F_EQ  =  4,   //  EQ instruction.
+  IL_CC_F_GE  =  5,   //  GE instruction.
+  IL_CC_F_LT  =  6,   //  LT instruction.
+  IL_CC_F_NE  =  7,   //  NE instruction.
+  IL_CC_I_EQ  =  8,   // IEQ instruction.
+  IL_CC_I_GE  =  9,   // IGE instruction.
+  IL_CC_I_LT  = 10,   // ILT instruction.
+  IL_CC_I_NE  = 11,   // INE instruction.
+  IL_CC_U_GE  = 12,   // UGE instruction.
+  IL_CC_U_LT  = 13,   // ULE instruction.
+  // Pseudo IL Comparison instructions here.
+  IL_CC_F_GT  = 14,   //  GT instruction.
+  IL_CC_U_GT  = 15,
+  IL_CC_I_GT  = 16,
+  IL_CC_D_GT  = 17,
+  IL_CC_F_LE  = 18,   //  LE instruction
+  IL_CC_U_LE  = 19,
+  IL_CC_I_LE  = 20,
+  IL_CC_D_LE  = 21,
+  IL_CC_F_UNE = 22,
+  IL_CC_F_UEQ = 23,
+  IL_CC_F_ULT = 24,
+  IL_CC_F_UGT = 25,
+  IL_CC_F_ULE = 26,
+  IL_CC_F_UGE = 27,
+  IL_CC_F_ONE = 28,
+  IL_CC_F_OEQ = 29,
+  IL_CC_F_OLT = 30,
+  IL_CC_F_OGT = 31,
+  IL_CC_F_OLE = 32,
+  IL_CC_F_OGE = 33,
+  IL_CC_D_UNE = 34,
+  IL_CC_D_UEQ = 35,
+  IL_CC_D_ULT = 36,
+  IL_CC_D_UGT = 37,
+  IL_CC_D_ULE = 38,
+  IL_CC_D_UGE = 39,
+  IL_CC_D_ONE = 40,
+  IL_CC_D_OEQ = 41,
+  IL_CC_D_OLT = 42,
+  IL_CC_D_OGT = 43,
+  IL_CC_D_OLE = 44,
+  IL_CC_D_OGE = 45,
+  IL_CC_U_EQ  = 46,
+  IL_CC_U_NE  = 47,
+  IL_CC_F_O   = 48,
+  IL_CC_D_O   = 49,
+  IL_CC_F_UO  = 50,
+  IL_CC_D_UO  = 51,
+  IL_CC_L_LE  = 52,
+  IL_CC_L_GE  = 53,
+  IL_CC_L_EQ  = 54,
+  IL_CC_L_NE  = 55,
+  IL_CC_L_LT  = 56,
+  IL_CC_L_GT  = 57,
+  IL_CC_UL_LE = 58,
+  IL_CC_UL_GE = 59,
+  IL_CC_UL_EQ = 60,
+  IL_CC_UL_NE = 61,
+  IL_CC_UL_LT = 62,
+  IL_CC_UL_GT = 63,
+  COND_ERROR  = 64
+};
+
+} // end namespace AMDILCC
+} // end namespace llvm
+#endif // AMDIL_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,16 @@
+//===-- AMDIL.td ----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AMDILBase.td"
+include "AMDILVersion.td"
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,676 @@
+//===-- AMDIL789IOExpansion.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the IO expansion class for 789 devices.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILIOExpansion.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cstdio>
+
+using namespace llvm;
+AMDIL789IOExpansion::AMDIL789IOExpansion(TargetMachine &tm,
+    CodeGenOpt::Level OptLevel)
+  : AMDILIOExpansion(tm, OptLevel)
+{
+}
+
+AMDIL789IOExpansion::~AMDIL789IOExpansion()
+{
+}
+
+const char *AMDIL789IOExpansion::getPassName() const
+{
+  return "AMDIL 789 IO Expansion Pass";
+}
+// This code produces the following pseudo-IL:
+// cmov_logical r1006.x___, r1008.y, r1006.y, r1006.x
+// cmov_logical r1006.x___, r1008.z, r1006.x, r1006.z
+// cmov_logical $dst.x___, r1008.w, r1006.x, r1006.w
+void
+AMDIL789IOExpansion::emitComponentExtract(MachineInstr *MI,
+    unsigned src, unsigned dst, bool before)
+{
+  DebugLoc DL = MI->getDebugLoc();
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32),
+          (src - AMDIL::R1) + AMDIL::Rx1)
+  .addReg(AMDIL::Ry1008)
+  .addReg((src - AMDIL::R1) + AMDIL::Ry1)
+  .addReg((src - AMDIL::R1) + AMDIL::Rx1);
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32),
+          (src - AMDIL::R1) + AMDIL::Rx1)
+  .addReg(AMDIL::Rz1008)
+  .addReg((src - AMDIL::R1) + AMDIL::Rz1)
+  .addReg((src - AMDIL::R1) + AMDIL::Rx1);
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), dst)
+  .addReg(AMDIL::Rw1008)
+  .addReg((src - AMDIL::R1) + AMDIL::Rw1)
+  .addReg((src - AMDIL::R1) + AMDIL::Rx1);
+}
+// We have a 128 bit load but a 8/16/32bit value, so we need to
+// select the correct component and make sure that the correct
+// bits are selected. For the 8 and 16 bit cases we need to
+// extract from the component the correct bits and for 32 bits
+// we just need to select the correct component.
+void
+AMDIL789IOExpansion::emitDataLoadSelect(MachineInstr *MI)
+{
+  DebugLoc DL = MI->getDebugLoc();
+  emitComponentExtract(MI, AMDIL::R1011, AMDIL::Rx1011, false);
+  if (getMemorySize(MI) == 1) {
+    // This produces the following pseudo-IL:
+    // iand r1006.x___, r1010.xxxx, l14.xxxx
+    // iadd r1006, r1006.x, {0, -1, 2, 3}
+    // ieq r1008, r1006, 0
+    // ishr r1011, r1011.x, {0, 8, 16, 24}
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1006)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(3));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1006)
+    .addReg(AMDIL::Rx1006)
+    .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+                                 (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
+    .addReg(AMDIL::R1006)
+    .addImm(mMFI->addi32Literal(0));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1011)
+    .addReg(AMDIL::Rx1011)
+    .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+    emitComponentExtract(MI, AMDIL::R1011, AMDIL::Rx1011, false);
+  } else if (getMemorySize(MI) == 2) {
+    // This produces the following pseudo-IL:
+    // ishr r1007.x___, r1010.xxxx, 1
+    // iand r1008.x___, r1007.xxxx, 1
+    // ishr r1007.x___, r1011.xxxx, 16
+    // cmov_logical r1011.x___, r1008.xxxx, r1007.xxxx, r1011.xxxx
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(1));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1007)
+    .addImm(mMFI->addi32Literal(1));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
+    .addReg(AMDIL::Rx1011)
+    .addImm(mMFI->addi32Literal(16));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1007)
+    .addReg(AMDIL::Rx1011);
+  }
+}
+// This function does address calculations modifications to load from a vector
+// register type instead of a dword addressed load.
+void
+AMDIL789IOExpansion::emitVectorAddressCalc(MachineInstr *MI, bool is32bit, bool needsSelect)
+{
+  DebugLoc DL = MI->getDebugLoc();
+  // This produces the following pseudo-IL:
+  // ishr r1007.x___, r1010.xxxx, (is32bit) ? 2 : 3
+  // iand r1008.x___, r1007.xxxx, (is32bit) ? 3 : 1
+  // ishr r1007.x___, r1007.xxxx, (is32bit) ? 2 : 1
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
+  .addReg(AMDIL::Rx1010)
+  .addImm(mMFI->addi32Literal((is32bit) ? 0x2 : 3));
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+  .addReg(AMDIL::Rx1007)
+  .addImm(mMFI->addi32Literal((is32bit) ? 3 : 1));
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
+  .addReg(AMDIL::Rx1007)
+  .addImm(mMFI->addi32Literal((is32bit) ? 2 : 1));
+  if (needsSelect) {
+    // If the component selection is required, the following
+    // pseudo-IL is produced.
+    // iadd r1008, r1008.x, (is32bit) ? {0, -1, -2, -3} : {0, 0, -1, -1}
+    // ieq r1008, r1008, 0
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi128Literal((is32bit) ? 0xFFFFFFFFULL << 32 : 0ULL,
+                                 (is32bit) ? 0xFFFFFFFEULL | (0xFFFFFFFDULL << 32) :
+                                 -1ULL));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
+    .addReg(AMDIL::R1008)
+    .addImm(mMFI->addi32Literal(0));
+  }
+}
+// This function emits a switch statement and writes 32bit/64bit
+// value to a 128bit vector register type.
+void
+AMDIL789IOExpansion::emitVectorSwitchWrite(MachineInstr *MI, bool is32bit)
+{
+  uint32_t xID = getPointerID(MI);
+  assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
+  // This section generates the following pseudo-IL:
+  // switch r1008.x
+  // default
+  //   mov x1[r1007.x].(is32bit) ? x___ : xy__, r1011.x{y}
+  // break
+  // case 1
+  //   mov x1[r1007.x].(is32bit) ? _y__ : __zw, r1011.x{yxy}
+  // break
+  // if is32bit is true, case 2 and 3 are emitted.
+  // case 2
+  //   mov x1[r1007.x].__z_, r1011.x
+  // break
+  // case 3
+  //   mov x1[r1007.x].___w, r1011.x
+  // break
+  // endswitch
+  DebugLoc DL = MI->getDebugLoc();
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SWITCH))
+  .addReg(AMDIL::Rx1008);
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DEFAULT));
+  BuildMI(*mBB, MI, DL,
+          mTII->get(AMDIL::SCRATCHSTORE),
+          (is32bit) ? AMDIL::Rx1007 : AMDIL::Rxy1007)
+  .addReg((is32bit) ? AMDIL::Rx1011 : AMDIL::Rxy1011)
+  .addImm(xID);
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CASE)).addImm(1);
+  BuildMI(*mBB, MI, DL,
+          mTII->get(AMDIL::SCRATCHSTORE),
+          (is32bit) ? AMDIL::Ry1007 : AMDIL::Rzw1007)
+  .addReg(is32bit ? AMDIL::Rx1011 : AMDIL::Rxy1011)
+  .addImm(xID);
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
+  if (is32bit) {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CASE)).addImm(2);
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SCRATCHSTORE), AMDIL::Rz1007)
+    .addReg(AMDIL::Rx1011)
+    .addImm(xID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CASE)).addImm(3);
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SCRATCHSTORE), AMDIL::Rw1007)
+    .addReg(AMDIL::Rx1011)
+    .addImm(xID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
+  }
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ENDSWITCH));
+
+}
+void
+AMDIL789IOExpansion::expandPrivateLoad(MachineInstr *MI)
+{
+  bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+  if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
+    return expandGlobalLoad(MI);
+  }
+  if (!mMFI->usesScratch() && mMFI->isKernel()) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+  }
+  uint32_t xID = getPointerID(MI);
+  assert(xID && "Found a scratch load that was incorrectly marked as zero ID!\n");
+  if (!xID) {
+    xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+    mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  // These instructions go before the current MI.
+  expandLoadStartCode(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    // Since the private register is a 128 bit aligned, we have to align the address
+    // first, since our source address is 32bit aligned and then load the data.
+    // This produces the following pseudo-IL:
+    // ishr r1010.x___, r1010.xxxx, 4
+    // mov r1011, x1[r1010.x]
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SHR_i32), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(4));
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(xID);
+    break;
+  case 1:
+  case 2:
+  case 4:
+    emitVectorAddressCalc(MI, true, true);
+    // This produces the following pseudo-IL:
+    // mov r1011, x1[r1007.x]
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+    .addReg(AMDIL::Rx1007)
+    .addImm(xID);
+    // These instructions go after the current MI.
+    emitDataLoadSelect(MI);
+    break;
+  case 8:
+    emitVectorAddressCalc(MI, false, true);
+    // This produces the following pseudo-IL:
+    // mov r1011, x1[r1007.x]
+    // cmov_logical r1011.xy__, r1008.xxxx, r1011.xy, r1011.zw
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+    .addReg(AMDIL::Rx1007)
+    .addImm(xID);
+    // These instructions go after the current MI.
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::CMOVLOG_i64), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rx1008)
+    .addReg(AMDIL::Rxy1011)
+    .addReg(AMDIL::Rzw1011);
+    break;
+  }
+  unsigned dataReg;
+  expandPackedData(MI);
+  dataReg = expandExtendLoad(MI);
+  if (!dataReg) {
+    dataReg = getDataReg(MI);
+  }
+  BuildMI(*mBB, MI, MI->getDebugLoc(),
+          mTII->get(getMoveInstFromID(
+                      MI->getDesc().OpInfo[0].RegClass)))
+  .addOperand(MI->getOperand(0))
+  .addReg(dataReg);
+  MI->getOperand(0).setReg(dataReg);
+}
+
+
+void
+AMDIL789IOExpansion::expandConstantLoad(MachineInstr *MI)
+{
+  if (!isHardwareInst(MI) || MI->memoperands_empty()) {
+    return expandGlobalLoad(MI);
+  }
+  uint32_t cID = getPointerID(MI);
+  if (cID < 2) {
+    return expandGlobalLoad(MI);
+  }
+  if (!mMFI->usesConstant() && mMFI->isKernel()) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+  }
+
+  DebugLoc DL = MI->getDebugLoc();
+  // These instructions go before the current MI.
+  expandLoadStartCode(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SHR_i32), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(4));
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(cID);
+    break;
+  case 1:
+  case 2:
+  case 4:
+    emitVectorAddressCalc(MI, true, true);
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+    .addReg(AMDIL::Rx1007)
+    .addImm(cID);
+    // These instructions go after the current MI.
+    emitDataLoadSelect(MI);
+    break;
+  case 8:
+    emitVectorAddressCalc(MI, false, true);
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+    .addReg(AMDIL::Rx1007)
+    .addImm(cID);
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::CMOVLOG_i64), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rx1008)
+    .addReg(AMDIL::Rxy1011)
+    .addReg(AMDIL::Rzw1011);
+    break;
+  }
+  expandPackedData(MI);
+  unsigned dataReg = expandExtendLoad(MI);
+  if (!dataReg) {
+    dataReg = getDataReg(MI);
+  }
+  BuildMI(*mBB, MI, MI->getDebugLoc(),
+          mTII->get(getMoveInstFromID(
+                      MI->getDesc().OpInfo[0].RegClass)))
+  .addOperand(MI->getOperand(0))
+  .addReg(dataReg);
+  MI->getOperand(0).setReg(dataReg);
+}
+
+void
+AMDIL789IOExpansion::expandConstantPoolLoad(MachineInstr *MI)
+{
+  if (!isStaticCPLoad(MI)) {
+    return expandConstantLoad(MI);
+  } else {
+    uint32_t idx = MI->getOperand(1).getIndex();
+    const MachineConstantPool *MCP = MI->getParent()->getParent()
+                                     ->getConstantPool();
+    const std::vector<MachineConstantPoolEntry> &consts
+    = MCP->getConstants();
+    const Constant *C = consts[idx].Val.ConstVal;
+    emitCPInst(MI, C, mKM, 0, isExtendLoad(MI));
+  }
+}
+
+void
+AMDIL789IOExpansion::expandPrivateStore(MachineInstr *MI)
+{
+  bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+  if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
+    return expandGlobalStore(MI);
+  }
+  if (!mMFI->usesScratch() && mMFI->isKernel()) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+  }
+  uint32_t xID = getPointerID(MI);
+  assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
+  if (!xID) {
+    xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+    mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  // These instructions go before the current MI.
+  expandStoreSetupCode(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    // This section generates the following pseudo-IL:
+    // ishr r1010.x___, r1010.xxxx, 4
+    // mov x1[r1010.x], r1011
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SHR_i32), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(4));
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SCRATCHSTORE), AMDIL::Rx1010)
+    .addReg(AMDIL::R1011)
+    .addImm(xID);
+    break;
+  case 1:
+    emitVectorAddressCalc(MI, true, true);
+    // This section generates the following pseudo-IL:
+    // mov r1002, x1[r1007.x]
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
+    .addReg(AMDIL::Rx1007)
+    .addImm(xID);
+    emitComponentExtract(MI, AMDIL::R1002, AMDIL::Rx1002, true);
+    // This section generates the following pseudo-IL:
+    // iand r1003.x, r1010.x, 3
+    // iadd r1001, r1003.x, {0, -1, -2, -3}
+    // ieq r1001, r1001, 0
+    // ishr r1002, r1002.x, {0, 8, 16, 24}
+    // cmov_logical r1002, r1001, r1011.x, r1002
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1003)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(3));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1001)
+    .addReg(AMDIL::Rx1003)
+    .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+                                 (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1001)
+    .addReg(AMDIL::R1001)
+    .addImm(mMFI->addi32Literal(0));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1002)
+    .addReg(AMDIL::Rx1002)
+    .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_v4i32), AMDIL::R1002)
+    .addReg(AMDIL::R1001)
+    .addReg(AMDIL::Rx1011)
+    .addReg(AMDIL::R1002);
+    if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+      // This section generates the following pseudo-IL:
+      // iand r1002, r1002, 0xFF
+      // ishl r1002, r1002, {0, 8, 16, 24}
+      // ior r1002.xy, r1002.xy, r1002.zw
+      // ior r1011.x, r1002.x, r1002.y
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1002)
+      .addReg(AMDIL::R1002)
+      .addImm(mMFI->addi32Literal(0xFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1002)
+      .addReg(AMDIL::R1002)
+      .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::Rxy1002)
+      .addReg(AMDIL::Rxy1002).addReg(AMDIL::Rzw1002);
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Ry1002).addReg(AMDIL::Rx1002);
+    } else {
+      // This section generates the following pseudo-IL:
+      // mov r1001.xy, r1002.yw
+      // mov r1002.xy, r1002.xz
+      // ubit_insert r1002.xy, 8, 8, r1001.xy, r1002.xy
+      // ubit_insert r1011.x, 16, 16, r1002.y, r1002.x
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::Rxy1001)
+      .addReg(AMDIL::R1002);
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1002)
+      .addReg(AMDIL::R1002);
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::Rxy1002)
+      .addImm(mMFI->addi32Literal(8))
+      .addImm(mMFI->addi32Literal(8))
+      .addReg(AMDIL::Rxy1001)
+      .addReg(AMDIL::Rxy1002);
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+      .addImm(mMFI->addi32Literal(16))
+      .addImm(mMFI->addi32Literal(16))
+      .addReg(AMDIL::Ry1002)
+      .addReg(AMDIL::Rx1002);
+    }
+    emitVectorAddressCalc(MI, true, false);
+    emitVectorSwitchWrite(MI, true);
+    break;
+  case 2:
+    emitVectorAddressCalc(MI, true, true);
+    // This section generates the following pseudo-IL:
+    // mov r1002, x1[r1007.x]
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
+    .addReg(AMDIL::Rx1007)
+    .addImm(xID);
+    emitComponentExtract(MI, AMDIL::R1002, AMDIL::Rx1002, true);
+    // This section generates the following pseudo-IL:
+    // ishr r1003.x, r1010.x, 1
+    // iand r1003.x, r1003.x, 1
+    // ishr r1001.x, r1002.x, 16
+    // cmov_logical r1002.x, r1003.x, r1002.x, r1011.x
+    // cmov_logical r1001.x, r1003.x, r1011.x, r1001.x
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1003)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(1));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1003)
+    .addReg(AMDIL::Rx1003)
+    .addImm(mMFI->addi32Literal(1));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1001)
+    .addReg(AMDIL::Rx1002)
+    .addImm(mMFI->addi32Literal(16));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1002)
+    .addReg(AMDIL::Rx1003)
+    .addReg(AMDIL::Rx1002)
+    .addReg(AMDIL::Rx1011);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1001)
+    .addReg(AMDIL::Rx1003)
+    .addReg(AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1001);
+    if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+      // This section generates the following pseudo-IL:
+      // iand r1002.x, r1002.x, 0xFFFF
+      // iand r1001.x, r1001.x, 0xFFFF
+      // ishl r1001.x, r1002.x, 16
+      // ior r1011.x, r1002.x, r1001.x
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1002)
+      .addReg(AMDIL::Rx1002)
+      .addImm(mMFI->addi32Literal(0xFFFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1001)
+      .addReg(AMDIL::Rx1001)
+      .addImm(mMFI->addi32Literal(0xFFFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1001)
+      .addReg(AMDIL::Rx1001)
+      .addImm(mMFI->addi32Literal(16));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_OR_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1002).addReg(AMDIL::Rx1001);
+
+    } else {
+      // This section generates the following pseudo-IL:
+      // ubit_insert r1011.x, 16, 16, r1001.x, r1002.x
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(16))
+      .addImm(mMFI->addi32Literal(16))
+      .addReg(AMDIL::Rx1001)
+      .addReg(AMDIL::Rx1002);
+    }
+    emitVectorAddressCalc(MI, true, false);
+    emitVectorSwitchWrite(MI, true);
+    break;
+  case 4:
+    emitVectorAddressCalc(MI, true, false);
+    emitVectorSwitchWrite(MI, true);
+    break;
+  case 8:
+    emitVectorAddressCalc(MI, false, false);
+    emitVectorSwitchWrite(MI, false);
+    break;
+  };
+}
+void
+AMDIL789IOExpansion::expandStoreSetupCode(MachineInstr *MI)
+{
+  DebugLoc DL;
+  bool is64bit = is64bitLSOp(TM, MI);
+  uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+  uint32_t addInst = (is64bit) ? AMDIL::LADD_i64 : AMDIL::ADD_i32;
+  uint32_t moveInst = (is64bit) ? AMDIL::MOVE_i64 : AMDIL::MOVE_i32;
+  if (MI->getOperand(0).isUndef()) {
+    BuildMI(*mBB, MI, DL, mTII->get(getMoveInstFromID(
+                                      MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
+    .addImm(mMFI->addi32Literal(0));
+  } else {
+    BuildMI(*mBB, MI, DL, mTII->get(getMoveInstFromID(
+                                      MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
+    .addReg(MI->getOperand(0).getReg());
+  }
+  expandTruncData(MI);
+  if (MI->getOperand(2).isReg()) {
+    BuildMI(*mBB, MI, DL, mTII->get(addInst), addyReg)
+    .addReg(MI->getOperand(1).getReg())
+    .addReg(MI->getOperand(2).getReg());
+  } else {
+    BuildMI(*mBB, MI, DL, mTII->get(moveInst), addyReg)
+    .addReg(MI->getOperand(1).getReg());
+  }
+  expandAddressCalc(MI);
+  expandPackedData(MI);
+}
+
+
+void
+AMDIL789IOExpansion::expandPackedData(MachineInstr *MI)
+{
+  if (!isPackedData(MI)) {
+    return;
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  // If we have packed data, then the shift size is no longer
+  // the same as the load size and we need to adjust accordingly
+  switch(getPackedID(MI)) {
+  default:
+    break;
+  case PACK_V2I8: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rxy1011)
+
+    .addImm(mMFI->addi64Literal(0xFFULL | (0xFFULL << 32)));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rxy1011).addImm(mMFI->addi64Literal(8ULL << 32));
+    // TODO: HILO_BITOR can be removed and replaced with OR.
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011).addReg(AMDIL::Ry1011);
+
+  }
+  break;
+  case PACK_V4I8: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+    .addReg(AMDIL::R1011)
+    .addImm(mMFI->addi32Literal(0xFF));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
+    .addReg(AMDIL::R1011)
+    .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
+    // TODO: HILO_BITOR can be removed and replaced with OR.
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rxy1011).addReg(AMDIL::Rzw1011);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011).addReg(AMDIL::Ry1011);
+
+  }
+  break;
+  case PACK_V2I16: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rxy1011)
+
+    .addImm(mMFI->addi32Literal(0xFFFF));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rxy1011)
+
+    .addImm(mMFI->addi64Literal(16ULL << 32));
+    // TODO: HILO_BITOR can be removed and replaced with OR.
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011).addReg(AMDIL::Ry1011);
+
+  }
+  break;
+  case PACK_V4I16: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+    .addReg(AMDIL::R1011)
+    .addImm(mMFI->addi32Literal(0xFFFF));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
+    .addReg(AMDIL::R1011)
+    .addImm(mMFI->addi64Literal(16ULL << 32));
+    // TODO: HILO_BITOR can be removed and replaced with OR.
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v4i16), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rxy1011).addReg(AMDIL::Rzw1011);
+
+  }
+  break;
+  case UNPACK_V2I8:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::Ry1011)
+    .addReg(AMDIL::Rx1011)
+    .addImm(mMFI->addi32Literal(8));
+    break;
+  case UNPACK_V4I8: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_v4i8), AMDIL::R1011)
+    .addReg(AMDIL::Rx1011)
+    .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
+  }
+  break;
+  case UNPACK_V2I16: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::Ry1011)
+    .addReg(AMDIL::Rx1011)
+    .addImm(mMFI->addi32Literal(16));
+  }
+  break;
+  case UNPACK_V4I16: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_v2i32), AMDIL::Rxy1012)
+    .addReg(AMDIL::Rxy1011)
+
+    .addImm(mMFI->addi32Literal(16));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+    .addReg(AMDIL::Rxy1011).addReg(AMDIL::Rxy1012);
+  }
+  break;
+  };
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,218 @@
+//===-- AMDIL7XXAsmPrinter.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL7XXAsmPrinter.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+// TODO: Add support for verbose.
+AMDIL7XXAsmPrinter::AMDIL7XXAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
+  : AMDILAsmPrinter(ASM_PRINTER_ARGUMENTS)
+{
+}
+
+AMDIL7XXAsmPrinter::~AMDIL7XXAsmPrinter()
+{
+}
+///
+/// @param name
+/// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+/// and returns that name if both of the tokens are present.
+///
+static
+std::string Strip(const std::string &name)
+{
+  size_t start = name.find("__OpenCL_");
+  size_t end = name.find("_kernel");
+  if (start == std::string::npos
+      || end == std::string::npos
+      || (start == end)) {
+    return name;
+  } else {
+    return name.substr(9, name.length()-16);
+  }
+}
+void
+AMDIL7XXAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+                                  OSTREAM_TYPE &O)
+{
+  const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+  const char *name = "unknown";
+  llvm::StringRef nameRef;
+  if (MI->getOperand(0).isGlobal()) {
+    nameRef = MI->getOperand(0).getGlobal()->getName();
+    name = nameRef.data();
+    if (curTarget->device()->usesHardware(
+          AMDILDeviceInfo::DoubleOps)
+        && !::strncmp(name, "__sqrt_f64", 10) ) {
+      name = "__sqrt_f64_7xx";
+    }
+  }
+  emitMCallInst(MI, O, name);
+}
+
+bool
+AMDIL7XXAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+  this->MF = &lMF;
+  mMeta->setMF(&lMF);
+  mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+  mAMI = &(lMF.getMMI().getObjFileInfo<AMDILModuleInfo>());
+
+  SetupMachineFunction(lMF);
+  std::string kernelName = MF->getFunction()->getName();
+  mName = Strip(kernelName);
+
+  mKernelName = kernelName;
+  EmitFunctionHeader();
+  EmitFunctionBody();
+  return false;
+}
+
+void
+AMDIL7XXAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+  std::string FunStr;
+  raw_string_ostream OFunStr(FunStr);
+  formatted_raw_ostream O(OFunStr);
+  const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+  if (mDebugMode) {
+    O << ";" ;
+    II->print(O);
+  }
+  if (isMacroFunc(II)) {
+    emitMacroFunc(II, O);
+    O.flush();
+    OutStreamer.EmitRawText(StringRef(FunStr));
+    return;
+  }
+  if (isMacroCall(II)) {
+    unsigned reg = 0;
+    unsigned newDst = 0;
+    OpSwizzle opSwiz, oldSwiz;
+    const char *name = mTM->getInstrInfo()->getName(II->getOpcode()) + 5;
+    int macronum = amd::MacroDBFindMacro(name);
+    O << "\t;"<< name<<"\n";
+    O << "\tmcall("<<macronum<<") ";
+    reg = II->getOperand(0).getReg();
+    newDst = AMDIL::R1000;
+    oldSwiz.u8all = opSwiz.u8all =
+                      II->getOperand(0).getTargetFlags();
+    if (isXComponentReg(reg)) {
+      newDst = AMDIL::Rx1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isYComponentReg(reg)) {
+      newDst = AMDIL::Ry1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isZComponentReg(reg)) {
+      newDst = AMDIL::Rz1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isWComponentReg(reg)) {
+      newDst = AMDIL::Rw1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isXYComponentReg(reg)) {
+      newDst = AMDIL::Rxy1000;
+      opSwiz.bits.swizzle = AMDIL_DST_XY__;
+    } else if (isZWComponentReg(reg)) {
+      newDst = AMDIL::Rzw1000;
+      opSwiz.bits.swizzle = AMDIL_DST_XY__;
+    } else {
+      opSwiz.bits.swizzle = AMDIL_DST_DFLT;
+    }
+    for (unsigned x = 0, y = II->getNumOperands(); x < y; ++x) {
+      if (!x) {
+        O << "(";
+        O << getRegisterName(newDst);
+        O << getDstSwizzle(opSwiz.bits.swizzle);
+      } else {
+        printOperand(II, x
+                     , O
+                    );
+      }
+      if (!x) {
+        O << "), (";
+      } else if (x != y - 1) {
+        O << ", ";
+      } else {
+        O << ")\n";
+      }
+    }
+    O << "\tmov " << getRegisterName(reg) << getDstSwizzle(oldSwiz.bits.swizzle)
+      << ", " << getRegisterName(newDst);
+    if (isXComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_X000);
+    } else if (isYComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_0X00);
+    } else if (isZComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_00X0);
+    } else if (isWComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_000X);
+    } else if (isXYComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_XY00);
+    } else if (isZWComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_00XY);
+    } else {
+      O << getSrcSwizzle(AMDIL_SRC_DFLT);
+    }
+    O << "\n";
+    if (curTarget->device()->isSupported(
+          AMDILDeviceInfo::MacroDB)) {
+      mMacroIDs.insert(macronum);
+    } else {
+      mMFI->addCalledIntr(macronum);
+    }
+  } else {
+
+
+    // Print the assembly for the instruction.
+    // We want to make sure that we do HW constants
+    // before we do arena segment
+    if (mMeta->useCompilerWrite(II)) {
+      // TODO: This is a hack to get around some
+      // conformance failures.
+      O << "\tif_logicalz cb0[0].x\n";
+      O << "\tuav_raw_store_id("
+        << curTarget->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+        << ") ";
+      O << "mem0.x___, cb0[3].x, r0.0\n";
+      O << "\tendif\n";
+      mMFI->addMetadata(";memory:compilerwrite");
+    } else {
+      printInstruction(II, O);
+    }
+  }
+  O.flush();
+  OutStreamer.EmitRawText(StringRef(FunStr));
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,62 @@
+//===-- AMDIL7XXAsmPrinter.h ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Asm Printer class for 7XX generation of cards. This class handles all of
+// the items that are unique to these devices that must be handles by the
+// AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_7XX_ASM_PRINTER_H_
+#define _AMDIL_7XX_ASM_PRINTER_H_
+#include "AMDILAsmPrinter.h"
+
+namespace llvm
+{
+class LLVM_LIBRARY_VISIBILITY AMDIL7XXAsmPrinter : public AMDILAsmPrinter
+{
+public:
+  //
+  // Constructor for the AMDIL 7XX specific AsmPrinter class.
+  // Interface is defined by LLVM proper and should reference
+  // there for more information.
+  //
+  AMDIL7XXAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS);
+
+  //
+  // Destructor for the 7XX Asm Printer class that deletes
+  // all of the allocated memory
+  //
+  virtual ~AMDIL7XXAsmPrinter();
+
+
+  void
+  EmitInstruction(const MachineInstr *MI);
+
+  //
+  // @param F MachineFunction to print the assembly for
+  // @brief parse the specified machine function and print
+  // out the assembly for all the instructions in the function
+  //
+  bool
+  runOnMachineFunction(MachineFunction &F);
+
+protected:
+  //
+  // @param MI Machine instruction to emit the macro code for
+  //
+  // Emits a fully functional macro function that uses the argument
+  // registers as the macro arguments.
+  //
+  virtual void
+  emitMacroFunc(const MachineInstr *MI, OSTREAM_TYPE &O);
+
+}; // AMDIL7XXAsmPrinter
+} // end of llvm namespace
+#endif // AMDIL_7XX_ASM_PRINTER_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,164 @@
+//===-- AMDIL7XXDevice.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL7XXDevice.h"
+#include "AMDIL7XXAsmPrinter.h"
+#include "AMDILDevice.h"
+#include "AMDILIOExpansion.h"
+#include "AMDILPointerManager.h"
+
+using namespace llvm;
+
+AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
+{
+  setCaps();
+  std::string name = mSTM->getDeviceName();
+  if (name == "rv710") {
+    mDeviceFlag = OCL_DEVICE_RV710;
+  } else if (name == "rv730") {
+    mDeviceFlag = OCL_DEVICE_RV730;
+  } else {
+    mDeviceFlag = OCL_DEVICE_RV770;
+  }
+}
+
+AMDIL7XXDevice::~AMDIL7XXDevice()
+{
+}
+
+void AMDIL7XXDevice::setCaps()
+{
+  mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL7XXDevice::getMaxLDSSize() const
+{
+  if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+    return MAX_LDS_SIZE_700;
+  }
+  return 0;
+}
+
+size_t AMDIL7XXDevice::getWavefrontSize() const
+{
+  return AMDILDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDIL7XXDevice::getGeneration() const
+{
+  return AMDILDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
+{
+  switch (DeviceID) {
+  default:
+    assert(0 && "ID type passed in is unknown!");
+    break;
+  case GLOBAL_ID:
+  case CONSTANT_ID:
+  case RAW_UAV_ID:
+  case ARENA_UAV_ID:
+    break;
+  case LDS_ID:
+    if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+      return DEFAULT_LDS_ID;
+    }
+    break;
+  case SCRATCH_ID:
+    if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+      return DEFAULT_SCRATCH_ID;
+    }
+    break;
+  case GDS_ID:
+    assert(0 && "GDS UAV ID is not supported on this chip");
+    if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+      return DEFAULT_GDS_ID;
+    }
+    break;
+  };
+
+  return 0;
+}
+
+uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
+{
+  return 1;
+}
+
+FunctionPass*
+AMDIL7XXDevice::getIOExpansion(
+  TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+  return new AMDIL7XXIOExpansion(TM, OptLevel);
+}
+
+AsmPrinter*
+AMDIL7XXDevice::getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const
+{
+  return new AMDIL7XXAsmPrinter(ASM_PRINTER_ARGUMENTS);
+}
+
+FunctionPass*
+AMDIL7XXDevice::getPointerManager(
+  TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+  if (OptLevel == CodeGenOpt::None) {
+    // If we have no optimizations, we need to run the
+    // more advanced tracking pass as it is possible to
+    // loose information through the stack. The EGPM
+    // pass tracks this, but the standard pass does not.
+    return new AMDILEGPointerManager(TM, OptLevel);
+  } else {
+    return new AMDILPointerManager(TM, OptLevel);
+  }
+}
+
+AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
+{
+  setCaps();
+}
+
+AMDIL770Device::~AMDIL770Device()
+{
+}
+
+void AMDIL770Device::setCaps()
+{
+  if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+    mSWBits.set(AMDILDeviceInfo::FMA);
+    mHWBits.set(AMDILDeviceInfo::DoubleOps);
+  }
+  mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+  mHWBits.reset(AMDILDeviceInfo::LongOps);
+  mSWBits.set(AMDILDeviceInfo::LongOps);
+  mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL770Device::getWavefrontSize() const
+{
+  return AMDILDevice::WavefrontSize;
+}
+
+AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
+{
+}
+
+AMDIL710Device::~AMDIL710Device()
+{
+}
+
+size_t AMDIL710Device::getWavefrontSize() const
+{
+  return AMDILDevice::QuarterWavefrontSize;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,77 @@
+//===-- AMDIL7XXDevice.h --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL7XXDEVICEIMPL_H_
+#define _AMDIL7XXDEVICEIMPL_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+namespace llvm
+{
+class AMDILSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
+// devices are derived from this class. The AMDIL7XX device will only
+// support the minimal features that are required to be considered OpenCL 1.0
+// compliant and nothing more.
+class AMDIL7XXDevice : public AMDILDevice
+{
+public:
+  AMDIL7XXDevice(AMDILSubtarget *ST);
+  virtual ~AMDIL7XXDevice();
+  virtual size_t getMaxLDSSize() const;
+  virtual size_t getWavefrontSize() const;
+  virtual uint32_t getGeneration() const;
+  virtual uint32_t getResourceID(uint32_t DeviceID) const;
+  virtual uint32_t getMaxNumUAVs() const;
+  FunctionPass*
+  getIOExpansion(TargetMachine&, CodeGenOpt::Level) const;
+  AsmPrinter*
+  getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const;
+  FunctionPass*
+  getPointerManager(TargetMachine&, CodeGenOpt::Level) const;
+
+protected:
+  virtual void setCaps();
+}; // AMDIL7XXDevice
+
+// The AMDIL770Device class represents the RV770 chip and it's
+// derivative cards. The difference between this device and the base
+// class is this device device adds support for double precision
+// and has a larger wavefront size.
+class AMDIL770Device : public AMDIL7XXDevice
+{
+public:
+  AMDIL770Device(AMDILSubtarget *ST);
+  virtual ~AMDIL770Device();
+  virtual size_t getWavefrontSize() const;
+private:
+  virtual void setCaps();
+}; // AMDIL770Device
+
+// The AMDIL710Device class derives from the 7XX base class, but this
+// class is a smaller derivative, so we need to overload some of the
+// functions in order to correctly specify this information.
+class AMDIL710Device : public AMDIL7XXDevice
+{
+public:
+  AMDIL710Device(AMDILSubtarget *ST);
+  virtual ~AMDIL710Device();
+  virtual size_t getWavefrontSize() const;
+}; // AMDIL710Device
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,520 @@
+//===-- AMDIL7XXIOExpansion.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the IO Printing class for 7XX devices.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILIOExpansion.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cstdio>
+
+using namespace llvm;
+AMDIL7XXIOExpansion::AMDIL7XXIOExpansion(TargetMachine &tm,
+    CodeGenOpt::Level OptLevel) : AMDIL789IOExpansion(tm, OptLevel)
+{
+}
+
+AMDIL7XXIOExpansion::~AMDIL7XXIOExpansion()
+{
+}
+const char *AMDIL7XXIOExpansion::getPassName() const
+{
+  return "AMDIL 7XX IO Expansion Pass";
+}
+
+void
+AMDIL7XXIOExpansion::expandGlobalLoad(MachineInstr *MI)
+{
+  DebugLoc DL = MI->getDebugLoc();
+  // These instructions go before the current MI.
+  expandLoadStartCode(MI);
+  uint32_t ID = getPointerID(MI);
+  mKM->setOutputInst();
+  switch(getMemorySize(MI)) {
+  default:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32), AMDIL::R1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(ID);
+    break;
+  case 4:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(ID);
+    break;
+  case 8:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(ID);
+    break;
+  case 1:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(3));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+                                 (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012)
+    .addReg(AMDIL::R1008)
+    .addImm(mMFI->addi32Literal(0));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1012)
+    .addImm(mMFI->addi32Literal(0))
+    .addImm(mMFI->addi32Literal(24));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Ry1012)
+    .addImm(mMFI->addi32Literal(8))
+    .addReg(AMDIL::Rx1008);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rz1012)
+    .addImm(mMFI->addi32Literal(16))
+    .addReg(AMDIL::Rx1008);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(ID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i8), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1008);
+    break;
+  case 2:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(3));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(1));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(16))
+    .addImm(mMFI->addi32Literal(0));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(ID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i16), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1008);
+    break;
+  }
+  expandPackedData(MI);
+  unsigned dataReg = expandExtendLoad(MI);
+  if (!dataReg) {
+    dataReg = getDataReg(MI);
+  }
+  BuildMI(*mBB, MI, MI->getDebugLoc(),
+          mTII->get(getMoveInstFromID(
+                      MI->getDesc().OpInfo[0].RegClass)))
+  .addOperand(MI->getOperand(0))
+  .addReg(dataReg);
+  MI->getOperand(0).setReg(dataReg);
+}
+
+void
+AMDIL7XXIOExpansion::expandRegionLoad(MachineInstr *MI)
+{
+  bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+  if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+    mMFI->addErrorMsg(
+      amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+    return;
+  }
+  if (!HWRegion || !isHardwareRegion(MI)) {
+    return expandGlobalLoad(MI);
+  }
+  if (!mMFI->usesGDS() && mMFI->isKernel()) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+  }
+  uint32_t gID = getPointerID(MI);
+  assert(gID && "Found a GDS load that was incorrectly marked as zero ID!\n");
+  if (!gID) {
+    gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+    mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+  }
+
+  DebugLoc DL = MI->getDebugLoc();
+  // These instructions go before the current MI.
+  expandLoadStartCode(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+    .addReg(AMDIL::Ry1010)
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rz1011)
+    .addReg(AMDIL::Rz1010)
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rw1011)
+    .addReg(AMDIL::Rw1010)
+    .addImm(gID);
+    break;
+  case 1:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(3));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(8));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(gID);
+    // The instruction would normally fit in right here so everything created
+    // after this point needs to go into the afterInst vector.
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1008);
+    break;
+  case 2:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(3));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(8));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(gID);
+    // The instruction would normally fit in right here so everything created
+    // after this point needs to go into the afterInst vector.
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1008);
+    break;
+  case 4:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(gID);
+    break;
+  case 8:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi64Literal(1ULL << 32));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+    .addReg(AMDIL::Ry1010)
+    .addImm(gID);
+    break;
+  }
+  expandPackedData(MI);
+  unsigned dataReg = expandExtendLoad(MI);
+  if (!dataReg) {
+    dataReg = getDataReg(MI);
+  }
+  BuildMI(*mBB, MI, MI->getDebugLoc(),
+          mTII->get(getMoveInstFromID(
+                      MI->getDesc().OpInfo[0].RegClass)))
+  .addOperand(MI->getOperand(0))
+  .addReg(dataReg);
+  MI->getOperand(0).setReg(dataReg);
+}
+void
+AMDIL7XXIOExpansion::expandLocalLoad(MachineInstr *MI)
+{
+  bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+  if (!HWLocal || !isHardwareLocal(MI)) {
+    return expandGlobalLoad(MI);
+  }
+  if (!mMFI->usesLDS() && mMFI->isKernel()) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+  }
+  uint32_t lID = getPointerID(MI);
+  assert(lID && "Found a LDS load that was incorrectly marked as zero ID!\n");
+  if (!lID) {
+    lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+    mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  // These instructions go before the current MI.
+  expandLoadStartCode(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC), AMDIL::R1011)
+    .addReg(AMDIL::R1010)
+    .addImm(lID);
+    break;
+  case 8:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rxy1010)
+    .addImm(lID);
+    break;
+  case 4:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(lID);
+    break;
+  case 1:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(3));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(8));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(lID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1008);
+    break;
+  case 2:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(3));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(8));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+    .addImm(lID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1008);
+    break;
+  }
+  expandPackedData(MI);
+  unsigned dataReg = expandExtendLoad(MI);
+  if (!dataReg) {
+    dataReg = getDataReg(MI);
+  }
+  BuildMI(*mBB, MI, MI->getDebugLoc(),
+          mTII->get(getMoveInstFromID(
+                      MI->getDesc().OpInfo[0].RegClass)))
+  .addOperand(MI->getOperand(0))
+  .addReg(dataReg);
+  MI->getOperand(0).setReg(dataReg);
+}
+
+void
+AMDIL7XXIOExpansion::expandGlobalStore(MachineInstr *MI)
+{
+  uint32_t ID = getPointerID(MI);
+  mKM->setOutputInst();
+  DebugLoc DL = MI->getDebugLoc();
+  // These instructions go before the current MI.
+  expandStoreSetupCode(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
+    .addReg(AMDIL::R1010)
+    .addReg(AMDIL::R1011)
+    .addImm(ID);
+    break;
+  case 1:
+    mMFI->addErrorMsg(
+      amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+    .addReg(AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+    .addImm(ID);
+    break;
+  case 2:
+    mMFI->addErrorMsg(
+      amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+    .addReg(AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+    .addImm(ID);
+    break;
+  case 4:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+    .addReg(AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+    .addImm(ID);
+    break;
+  case 8:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEMxy)
+    .addReg(AMDIL::Rx1010)
+    .addReg(AMDIL::Rxy1011)
+    .addImm(ID);
+    break;
+  };
+}
+
+void
+AMDIL7XXIOExpansion::expandRegionStore(MachineInstr *MI)
+{
+  bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+  if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+    mMFI->addErrorMsg(
+      amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+    return;
+  }
+  if (!HWRegion || !isHardwareRegion(MI)) {
+    return expandGlobalStore(MI);
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  mKM->setOutputInst();
+  if (!mMFI->usesGDS() && mMFI->isKernel()) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+  }
+  uint32_t gID = getPointerID(MI);
+  assert(gID && "Found a GDS store that was incorrectly marked as zero ID!\n");
+  if (!gID) {
+    gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+    mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+  }
+
+  // These instructions go before the current MI.
+  expandStoreSetupCode(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
+    .addReg(AMDIL::Ry1011)
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rz1010)
+    .addReg(AMDIL::Rz1011)
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rw1010)
+    .addReg(AMDIL::Rw1011)
+    .addImm(gID);
+    break;
+  case 1:
+    mMFI->addErrorMsg(
+      amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011)
+    .addImm(mMFI->addi32Literal(0xFF));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1012)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(3));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+                                 (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1006)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(8));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(0xFFFFFF00))
+    .addImm(mMFI->addi32Literal(0x00FFFFFF));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+    .addReg(AMDIL::Ry1008)
+    .addReg(AMDIL::Rx1007)
+    .addImm(mMFI->addi32Literal(0xFF00FFFF));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+    .addReg(AMDIL::Rz1008)
+    .addReg(AMDIL::Rx1007)
+    .addImm(mMFI->addi32Literal(0xFFFF00FF));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1007);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+    .addImm(gID);
+    break;
+  case 2:
+    mMFI->addErrorMsg(
+      amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011)
+    .addImm(mMFI->addi32Literal(0x0000FFFF));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi32Literal(3));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(1));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(0x0000FFFF))
+    .addImm(mMFI->addi32Literal(0xFFFF0000));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+    .addReg(AMDIL::Rx1008)
+    .addImm(mMFI->addi32Literal(16))
+    .addImm(mMFI->addi32Literal(0));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1008);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+    .addImm(gID);
+    break;
+  case 4:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+    .addImm(gID);
+    break;
+  case 8:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi64Literal(1ULL << 32));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
+    .addReg(AMDIL::Ry1011)
+    .addImm(gID);
+    break;
+  };
+}
+
+void
+AMDIL7XXIOExpansion::expandLocalStore(MachineInstr *MI)
+{
+  bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+  if (!HWLocal || !isHardwareLocal(MI)) {
+    return expandGlobalStore(MI);
+  }
+  uint32_t lID = getPointerID(MI);
+  assert(lID && "Found a LDS store that was incorrectly marked as zero ID!\n");
+  if (!lID) {
+    lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+    mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  // These instructions go before the current MI.
+  expandStoreSetupCode(MI);
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC), AMDIL::MEM)
+  .addReg(AMDIL::R1010)
+  .addReg(AMDIL::R1011)
+  .addImm(lID);
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAlgorithms.tpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAlgorithms.tpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAlgorithms.tpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAlgorithms.tpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,86 @@
+//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
+//
+// This file provides templates algorithms that extend the STL algorithms, but
+// are useful for the AMDIL backend
+//
+//===----------------------------------------------------------------------===//
+
+// A template function that loops through the iterators and passes the second
+// argument along with each iterator to the function. If the function returns
+// true, then the current iterator is invalidated and it moves back, before
+// moving forward to the next iterator, otherwise it moves forward without
+// issue. This is based on the for_each STL function, but allows a reference to
+// the second argument
+template<class InputIterator, class Function, typename Arg>
+Function binaryForEach(InputIterator First, InputIterator Last, Function F,
+                       Arg &Second)
+{
+  for ( ; First!=Last; ++First ) {
+    F(*First, Second);
+  }
+  return F;
+}
+
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
+                           Arg &Second)
+{
+  for ( ; First!=Last; ++First ) {
+    if (F(*First, Second)) {
+      --First;
+    }
+  }
+  return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with the passed in argument. See binaryForEach for further
+// explanation
+template<class InputIterator, class Function, typename Arg>
+Function binaryNestedForEach(InputIterator First, InputIterator Last,
+                             Function F, Arg &Second)
+{
+  for ( ; First != Last; ++First) {
+    binaryForEach(First->begin(), First->end(), F, Second);
+  }
+  return F;
+}
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
+                                 Function F, Arg &Second)
+{
+  for ( ; First != Last; ++First) {
+    safeBinaryForEach(First->begin(), First->end(), F, Second);
+  }
+  return F;
+}
+
+// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
+// versions of these functions This allows the function to handle situations
+// such as invalidated iterators
+template<class InputIterator, class Function>
+Function safeForEach(InputIterator First, InputIterator Last, Function F)
+{
+  for ( ; First!=Last; ++First )  F(&First)
+    ; // Do nothing.
+  return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator. See binaryForEach for
+// further explanation
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+                              SecondIterator S, Function F)
+{
+  for ( ; First != Last; ++First) {
+    SecondIterator sf, sl;
+    for (sf = First->begin(), sl = First->end();
+         sf != sl; )  {
+      if (!F(&sf)) {
+        ++sf;
+      } 
+    }
+  }
+  return F;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,98 @@
+//===-- AMDILAsmBackend.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILAsmBackend.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+namespace llvm
+{
+ASM_BACKEND_CLASS* createAMDILAsmBackend(const ASM_BACKEND_CLASS &T,
+    const std::string &TT)
+{
+  return new AMDILAsmBackend(T);
+}
+} // namespace llvm
+
+//===--------------------- Default AMDIL Asm Backend ---------------------===//
+AMDILAsmBackend::AMDILAsmBackend(const ASM_BACKEND_CLASS &T)
+  : ASM_BACKEND_CLASS()
+{
+}
+
+MCObjectWriter *
+AMDILAsmBackend::createObjectWriter(raw_ostream &OS) const
+{
+  return 0;
+}
+
+bool
+AMDILAsmBackend::doesSectionRequireSymbols(const MCSection &Section) const
+{
+  return false;
+}
+
+bool
+AMDILAsmBackend::isSectionAtomizable(const MCSection &Section) const
+{
+  return true;
+}
+
+bool
+AMDILAsmBackend::isVirtualSection(const MCSection &Section) const
+{
+  return false;
+  //const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
+  //return SE.getType() == MCSectionELF::SHT_NOBITS;
+}
+void
+AMDILAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                            uint64_t Value) const
+{
+}
+
+bool
+AMDILAsmBackend::mayNeedRelaxation(const MCInst &Inst
+                                  ) const
+{
+  return false;
+}
+
+bool
+AMDILAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                      uint64_t Value,
+                                      const MCInstFragment *DF,
+                                      const MCAsmLayout &Layout) const
+{
+  // Relax if the value is too big for a (signed) i8.
+  return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+
+
+void
+AMDILAsmBackend::relaxInstruction(const MCInst &Inst,
+                                  MCInst &Res) const
+{
+}
+
+bool
+AMDILAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const
+{
+  return false;
+}
+
+unsigned
+AMDILAsmBackend::getNumFixupKinds() const
+{
+  return 0;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,47 @@
+//===-- AMDILAsmBackend.h -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_ASM_BACKEND_H_
+#define _AMDIL_ASM_BACKEND_H_
+#include "AMDIL.h"
+#include "llvm/MC/MCAsmBackend.h"
+#define ASM_BACKEND_CLASS MCAsmBackend
+
+using namespace llvm;
+namespace llvm
+{
+class AMDILAsmBackend : public ASM_BACKEND_CLASS
+{
+public:
+  AMDILAsmBackend(const ASM_BACKEND_CLASS &T);
+  virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const;
+  virtual bool isSectionAtomizable(const MCSection &Section) const;
+  virtual bool isVirtualSection(const MCSection &Section) const;
+  virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                          uint64_t Value) const;
+  virtual bool
+  mayNeedRelaxation(const MCInst &Inst
+                   ) const;
+  virtual bool
+  fixupNeedsRelaxation(const MCFixup &Fixup,
+                       uint64_t Value,
+                       const MCInstFragment *DF,
+                       const MCAsmLayout &Layout) const;
+  virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
+  virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+  unsigned getNumFixupKinds() const;
+}; // class AMDILAsmBackend;
+} // llvm namespace
+
+#endif // _AMDIL_ASM_BACKEND_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1028 @@
+//===-- AMDILAsmPrinter.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#if !defined(NDEBUG) && !defined(USE_APPLE)
+# define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+# define DEBUGME (false)
+#endif
+#include "AMDILAsmPrinter.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILCompilerErrors.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <sstream>
+using namespace llvm;
+/// createAMDILCodePrinterPass - Returns a pass that prints the AMDIL
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+
+ASMPRINTER_RETURN_TYPE
+createAMDILCodePrinterPass(AMDIL_ASM_PRINTER_ARGUMENTS)
+{
+  const AMDILSubtarget *stm = &TM.getSubtarget<AMDILSubtarget>();
+  return stm->device()->getAsmPrinter(ASM_PRINTER_ARGUMENTS);
+}
+
+#include "AMDILGenAsmWriter.inc"
+// Force static initialization
+extern "C" void LLVMInitializeAMDILAsmPrinter()
+{
+  llvm::TargetRegistry::RegisterAsmPrinter(TheAMDILTarget,
+      createAMDILCodePrinterPass);
+}
+
+AMDILInstPrinter *llvm::createAMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+    const MCRegisterInfo &MRI)
+{
+  return new AMDILInstPrinter(MAI, MII, MRI);
+}
+
+//
+// @param name
+// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+// and returns that name if both of the tokens are present.
+//
+static
+std::string Strip(const std::string &name)
+{
+  size_t start = name.find("__OpenCL_");
+  size_t end = name.find("_kernel");
+  if (start == std::string::npos
+      || end == std::string::npos
+      || (start == end)) {
+    return name;
+  } else {
+    return name.substr(9, name.length()-16);
+  }
+}
+// TODO: Add support for verbose.
+AMDILAsmPrinter::AMDILAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
+  : AsmPrinter(ASM_PRINTER_ARGUMENTS)
+{
+  mDebugMode = DEBUGME;
+  mTM = reinterpret_cast<AMDILTargetMachine*>(&TM);
+  mTM->setDebug(mDebugMode);
+  mMeta = new AMDILKernelManager(mTM);
+  mBuffer = 0;
+  mNeedVersion = false;
+  mMFI = NULL;
+  mAMI = NULL;
+}
+
+AMDILAsmPrinter::~AMDILAsmPrinter()
+{
+  delete mMeta;
+}
+const char*
+AMDILAsmPrinter::getPassName() const
+{
+  return "AMDIL Assembly Printer";
+}
+
+void
+AMDILAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+  std::string FunStr;
+  raw_string_ostream OFunStr(FunStr);
+  formatted_raw_ostream O(OFunStr);
+  const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+  if (mDebugMode) {
+    O << ";" ;
+    II->print(O);
+  }
+  if (isMacroFunc(II)) {
+    emitMacroFunc(II, O);
+    O.flush();
+    OutStreamer.EmitRawText(StringRef(FunStr));
+    return;
+  }
+  if (isMacroCall(II)) {
+    unsigned reg = 0;
+    unsigned newDst = 0;
+    OpSwizzle opSwiz, oldSwiz;
+    const char *name = mTM->getInstrInfo()->getName(II->getOpcode()) + 5;
+    int macronum = amd::MacroDBFindMacro(name);
+    O << "\t;"<< name<<"\n";
+    O << "\tmcall("<<macronum<<")";
+    reg = II->getOperand(0).getReg();
+    newDst = AMDIL::R1000;
+    oldSwiz.u8all = opSwiz.u8all =
+                      II->getOperand(0).getTargetFlags();
+    if (isXComponentReg(reg)) {
+      newDst = AMDIL::Rx1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isYComponentReg(reg)) {
+      newDst = AMDIL::Ry1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isZComponentReg(reg)) {
+      newDst = AMDIL::Rz1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isWComponentReg(reg)) {
+      newDst = AMDIL::Rw1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isXYComponentReg(reg)) {
+      newDst = AMDIL::Rxy1000;
+      opSwiz.bits.swizzle = AMDIL_DST_XY__;
+    } else if (isZWComponentReg(reg)) {
+      newDst = AMDIL::Rzw1000;
+      opSwiz.bits.swizzle = AMDIL_DST_XY__;
+    } else {
+      opSwiz.bits.swizzle = AMDIL_DST_DFLT;
+    }
+    for (unsigned x = 0, y = II->getNumOperands(); x < y; ++x) {
+      if (!x) {
+        O << "(";
+        O << getRegisterName(newDst);
+        O << getDstSwizzle(opSwiz.bits.swizzle);
+      } else {
+        printOperand(II, x
+                     , O
+                    );
+      }
+      if (!x) {
+        O << "), (";
+      } else if (x != y - 1) {
+        O << ", ";
+      } else {
+        O << ")\n";
+      }
+    }
+    O << "\tmov " << getRegisterName(reg) << getDstSwizzle(oldSwiz.bits.swizzle)
+      << ", " << getRegisterName(newDst);
+    if (isXComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_X000);
+    } else if (isYComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_0X00);
+    } else if (isZComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_00X0);
+    } else if (isWComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_000X);
+    } else if (isXYComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_XY00);
+    } else if (isZWComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_00XY);
+    } else {
+      O << getSrcSwizzle(AMDIL_SRC_DFLT);
+    }
+    O << "\n";
+    if (curTarget->device()->isSupported(
+          AMDILDeviceInfo::MacroDB)) {
+      mMacroIDs.insert(macronum);
+    } else {
+      mMFI->addCalledIntr(macronum);
+    }
+  } else {
+
+    printInstruction(II, O);
+  }
+  O.flush();
+  OutStreamer.EmitRawText(StringRef(FunStr));
+}
+void
+AMDILAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+                               OSTREAM_TYPE &O)
+{
+  const char *name = "unknown";
+  llvm::StringRef nameRef;
+  if (MI->getOperand(0).isGlobal()) {
+    nameRef = MI->getOperand(0).getGlobal()->getName();
+    name = nameRef.data();
+  }
+  emitMCallInst(MI, O, name);
+}
+
+bool
+AMDILAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+  this->MF = &lMF;
+  mMeta->setMF(&lMF);
+  mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+  mAMI = &(lMF.getMMI().getObjFileInfo<AMDILModuleInfo>());
+
+  SetupMachineFunction(lMF);
+  std::string kernelName = MF->getFunction()->getName();
+  mName = Strip(kernelName);
+
+  mKernelName = kernelName;
+  EmitFunctionHeader();
+  EmitFunctionBody();
+  return false;
+}
+
+void
+AMDILAsmPrinter::addCPoolLiteral(const Constant *C)
+{
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+    if (CFP->getType()->isFloatTy()) {
+      mMFI->addf32Literal(CFP);
+    } else {
+      mMFI->addf64Literal(CFP);
+    }
+  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+    int64_t val = 0;
+    if (CI) {
+      val = CI->getSExtValue();
+    }
+    if (CI->getBitWidth() == (int64_t)64) {
+      mMFI->addi64Literal(val);
+    } else if (CI->getBitWidth() == (int64_t)8) {
+      mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i8);
+    } else if (CI->getBitWidth() == (int64_t)16) {
+      mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i16);
+    } else {
+      mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i32);
+    }
+  } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+    uint32_t size = CA->getNumOperands();
+    for (uint32_t x = 0; x < size; ++x) {
+      addCPoolLiteral(CA->getOperand(x));
+    }
+  } else if (const ConstantAggregateZero *CAZ
+             = dyn_cast<ConstantAggregateZero>(C)) {
+    if (CAZ->isNullValue()) {
+      mMFI->addi32Literal(0, AMDIL::LOADCONST_i32);
+      mMFI->addi64Literal(0);
+      mMFI->addf64Literal((uint64_t)0);
+      mMFI->addf32Literal((uint32_t)0);
+    }
+  } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+    uint32_t size = CS->getNumOperands();
+    for (uint32_t x = 0; x < size; ++x) {
+      addCPoolLiteral(CS->getOperand(x));
+    }
+  } else if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+    // TODO: Make this handle vectors natively up to the correct
+    // size
+    uint32_t size = CV->getNumOperands();
+    for (uint32_t x = 0; x < size; ++x) {
+      addCPoolLiteral(CV->getOperand(x));
+    }
+  } else {
+    // TODO: Do we really need to handle ConstantPointerNull?
+    // What about BlockAddress, ConstantExpr and Undef?
+    // How would these even be generated by a valid CL program?
+    assert(0 && "Found a constant type that I don't know how to handle");
+  }
+}
+
+void
+AMDILAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV)
+{
+  llvm::StringRef GVname = GV->getName();
+  SmallString<1024> Str;
+  raw_svector_ostream O(Str);
+  int32_t autoSize = mAMI->getArrayOffset(GVname);
+  int32_t constSize = mAMI->getConstOffset(GVname);
+  O << ".global@" << GVname;
+  if (autoSize != -1) {
+    O << ":" << autoSize << "\n";
+  } else if (constSize != -1) {
+    O << ":" << constSize << "\n";
+  }
+  O.flush();
+  OutStreamer.EmitRawText(O.str());
+}
+
+
+void
+AMDILAsmPrinter::printOperand(const MachineInstr *MI, int opNum
+                              , OSTREAM_TYPE &O
+                             )
+{
+  const MachineOperand &MO = MI->getOperand (opNum);
+
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    if (MO.isReg()) {
+      unsigned opcode = MI->getOpcode();
+      if ((signed)MO.getReg() < 0) {
+        // FIXME: we need to remove all virtual register creation after register allocation.
+        // This is a work-around to make sure that the virtual register range does not
+        // clobber the physical register range.
+        O << "r" << ((MO.getReg() & 0x7FFFFFFF)  + 2048) << getSwizzle(MI, opNum);
+      } else if (opNum == 0
+                 && (opcode == AMDIL::SCRATCHSTORE
+                     ||opcode == AMDIL::SCRATCHSTORE64)) {
+        O << getRegisterName(MO.getReg()) << ".x]";
+        // If we aren't the vector register, print the dst swizzle.
+        if (MI->getOperand(1).getReg() != AMDIL::R1011) {
+          O << getSwizzle(MI, opNum);
+        }
+      } else {
+        O << getRegisterName(MO.getReg()) << getSwizzle(MI, opNum);
+      }
+    } else {
+      assert(0 && "Invalid Register type");
+      mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+    }
+    break;
+  case MachineOperand::MO_Immediate:
+  case MachineOperand::MO_FPImmediate: {
+    unsigned opcode = MI->getOpcode();
+    if ((opNum == (int)(MI->getNumOperands() - 1))
+        && (   (opcode >= AMDIL::ATOM_A_ADD
+                && opcode <= AMDIL::ATOM_R_XOR_NORET_B64)
+               || (opcode >= AMDIL::ATOM64_G_ADD
+                   && opcode <= AMDIL::ATOM64_R_XOR_NORET_B64)
+               || opcode == AMDIL::SEMAPHORE_INIT
+               || (opcode >= AMDIL::SCRATCHLOAD
+                   && opcode <= AMDIL::SCRATCHSTORE)
+               || (opcode >= AMDIL::LDSLOAD && opcode <= AMDIL::LDSSTORE_i8)
+               || (opcode >= AMDIL::GDSLOAD && opcode <= AMDIL::GDSSTORE)
+               || (opcode >= AMDIL::UAVARENALOAD_i16
+                   && opcode <= AMDIL::UAVRAWSTORE_v4i32)
+               || opcode == AMDIL::CBLOAD
+               || opcode == AMDIL::CASE)
+       ) {
+      O << MO.getImm();
+    } else if (((opcode >= AMDIL::VEXTRACT_v2f32
+                 && opcode <= AMDIL::VEXTRACT_v4i8)
+                && (opNum == 2))) {
+      // The swizzle is encoded in the operand so the
+      // literal that represents the swizzle out of ISel
+      // can be ignored.
+    } else if ((opcode >= AMDIL::VINSERT_v2f32)
+               && (opcode <= AMDIL::VINSERT_v4i8)
+               && ((opNum == 3)  || (opNum == 4))) {
+      // The swizzle is encoded in the operand so the
+      // literal that represents the swizzle out of ISel
+      // can be ignored.
+      // The swizzle is encoded in the operand so the
+      // literal that represents the swizzle out of ISel
+      // can be ignored.
+    } else if (opNum == 1 &&
+               (isAppendInst(TM, MI)
+                || isReadImageInst(TM, MI)
+                || isImageTXLDInst(TM, MI)
+                || opcode == AMDIL::CBLOAD)) {
+      // We don't need to emit the 'l' so we just emit
+      // the immediate as it stores the resource ID and
+      // is not a true literal.
+      O << MO.getImm();
+    } else if (opNum == 0 &&
+               (opcode == AMDIL::SEMAPHORE_INIT
+                || opcode == AMDIL::SEMAPHORE_WAIT
+                || opcode == AMDIL::SEMAPHORE_SIGNAL
+                || isReadImageInst(TM, MI)
+                || isWriteImageInst(TM, MI))) {
+      O << MO.getImm();
+    } else if (opNum == 3 && isReadImageInst(TM, MI)) {
+      O << MO.getImm();
+    } else if (MO.isImm() || MO.isFPImm()) {
+      O << "l" << MO.getImm() << getSwizzle(MI, opNum);
+    } else {
+      assert(0 && "Invalid literal/constant type");
+      mMFI->addErrorMsg(
+        amd::CompilerErrorMessage[INTERNAL_ERROR]);
+    }
+  }
+  break;
+  case MachineOperand::MO_MachineBasicBlock:
+    EmitBasicBlockStart(MO.getMBB());
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    int offset = 0;
+    const GlobalValue *gv = MO.getGlobal();
+    // Here we look up by the name for the corresponding number
+    // and we print that out instead of the name or the address
+    if (MI->getOpcode() == AMDIL::CALL) {
+      uint32_t funcNum;
+      llvm::StringRef name = gv->getName();
+      funcNum = name.empty()
+                ?  mAMI->getOrCreateFunctionID(gv)
+                : mAMI->getOrCreateFunctionID(name);
+      mMFI->addCalledFunc(funcNum);
+      O << funcNum <<" ; "<< name;
+    } else if((offset = mAMI->getArrayOffset(gv->getName()))
+              != -1) {
+      mMFI->setUsesLDS();
+      O << "l" << mMFI->getIntLits(offset) << ".x";
+    } else if((offset = mAMI->getConstOffset(gv->getName()))
+              != -1) {
+      mMFI->addMetadata(";memory:datareqd");
+      O << "l" << mMFI->getIntLits(offset) << ".x";
+      mMFI->setUsesConstant();
+    } else {
+      assert(0 && "GlobalAddress without a function call!");
+      mMFI->addErrorMsg(
+        amd::CompilerErrorMessage[MISSING_FUNCTION_CALL]);
+    }
+  }
+  break;
+  case MachineOperand::MO_ExternalSymbol: {
+    if (MI->getOpcode() == AMDIL::CALL) {
+      uint32_t funcNum = mAMI->getOrCreateFunctionID(
+                           std::string(MO.getSymbolName()));
+      mMFI->addCalledFunc(funcNum);
+      O << funcNum << " ; "<< MO.getSymbolName();
+      // This is where pointers should get resolved
+    } else {
+      assert(0 && "ExternalSymbol without a function call!");
+      mMFI->addErrorMsg(
+        amd::CompilerErrorMessage[MISSING_FUNCTION_CALL]);
+    }
+  }
+  break;
+  case MachineOperand::MO_ConstantPoolIndex: {
+    // Copies of constant buffers need to be done here
+    const AMDILKernel *tmp = mAMI->getKernel(mKernelName);
+    O << "l" << mMFI->getIntLits(
+        tmp->CPOffsets[MO.getIndex()].first);
+  }
+  break;
+  default:
+    O << "<unknown operand type>";
+    break;
+  }
+}
+
+void
+AMDILAsmPrinter::printMemOperand(
+  const MachineInstr *MI,
+  int opNum,
+  OSTREAM_TYPE &O,
+  const char *Modifier
+)
+{
+  const MachineOperand &MO = MI->getOperand (opNum);
+  if (opNum != 1) {
+    printOperand(MI, opNum
+                 , O
+                );
+  } else {
+    switch (MO.getType()) {
+    case MachineOperand::MO_Register:
+      if (MO.isReg()) {
+        unsigned opcode = MI->getOpcode();
+        if ((signed)MO.getReg() < 0) {
+          // FIXME: we need to remove all virtual register creation after register allocation.
+          // This is a work-around to make sure that the virtual register range does not
+          // clobber the physical register range.
+          O << "r" << ((MO.getReg() & 0x7FFFFFFF) + 2048) << getSwizzle(MI, opNum);
+        } else if (opNum == 0
+                   && (opcode == AMDIL::SCRATCHSTORE
+                       ||opcode == AMDIL::SCRATCHSTORE64)) {
+          O << getRegisterName(MO.getReg()) << ".x]" << getSwizzle(MI, opNum);
+        } else {
+          O << getRegisterName(MO.getReg()) << getSwizzle(MI, opNum);
+        }
+      } else {
+        assert(0 && "Invalid Register type");
+        mMFI->addErrorMsg(
+          amd::CompilerErrorMessage[INTERNAL_ERROR]);
+      }
+      break;
+    case MachineOperand::MO_Immediate:
+    case MachineOperand::MO_FPImmediate: {
+      unsigned opcode = MI->getOpcode();
+      if ((opNum == (int)(MI->getNumOperands() - 1))
+          && ((opcode >= AMDIL::ATOM_A_ADD
+               && opcode <= AMDIL::ATOM_R_XOR_B64)
+              || opcode == AMDIL::SEMAPHORE_INIT
+              || (opcode >= AMDIL::SCRATCHLOAD
+                  && opcode <= AMDIL::SCRATCHSTORE)
+              || (opcode >= AMDIL::LDSLOAD && opcode <= AMDIL::LDSSTORE_i8)
+              || (opcode >= AMDIL::GDSLOAD && opcode <= AMDIL::GDSSTORE)
+              || (opcode >= AMDIL::UAVARENALOAD_i32
+                  && opcode <= AMDIL::UAVRAWSTORE_v4i32)
+              || opcode == AMDIL::CBLOAD
+              || opcode == AMDIL::CASE)
+         ) {
+        O << MO.getImm();
+      } else if (opNum == 1 &&
+                 (isAppendInst(TM, MI)
+                  || isReadImageInst(TM, MI)
+                  || isImageTXLDInst(TM, MI)
+                  || opcode == AMDIL::CBLOAD)) {
+        // We don't need to emit the 'l' so we just emit
+        // the immediate as it stores the resource ID and
+        // is not a true literal.
+        O << MO.getImm();
+      } else if (opNum == 0 &&
+                 (opcode == AMDIL::SEMAPHORE_INIT
+                  || opcode == AMDIL::SEMAPHORE_WAIT
+                  || opcode == AMDIL::SEMAPHORE_SIGNAL
+                  || isReadImageInst(TM, MI)
+                  || isWriteImageInst(TM, MI))) {
+        O << MO.getImm();
+      } else if (opNum == 3 && isReadImageInst(TM, MI)) {
+        O << MO.getImm();
+      } else if (MO.isImm() || MO.isFPImm()) {
+        O << "l" << MO.getImm();
+      } else {
+        assert(0 && "Invalid literal/constant type");
+        mMFI->addErrorMsg(
+          amd::CompilerErrorMessage[INTERNAL_ERROR]);
+      }
+    }
+    break;
+    case MachineOperand::MO_ConstantPoolIndex: {
+      // Copies of constant buffers need to be done here
+      const AMDILKernel *tmp = mAMI->getKernel(mKernelName);
+      O << "l" << mMFI->getIntLits(
+          tmp->CPOffsets[MO.getIndex()].first);
+    }
+    break;
+    default:
+      O << "<unknown operand type>";
+      break;
+    };
+  }
+}
+
+
+const char*
+AMDILAsmPrinter::getSwizzle(const MachineInstr *MI, int opNum)
+{
+  const MachineOperand &MO = MI->getOperand(opNum);
+  OpSwizzle swiz;
+  swiz.u8all = MO.getTargetFlags();
+  if (!swiz.bits.dst) {
+    return getSrcSwizzle(swiz.bits.swizzle);
+  } else {
+    return getDstSwizzle(swiz.bits.swizzle);
+  }
+}
+
+void
+AMDILAsmPrinter::EmitStartOfAsmFile(Module &M)
+{
+  SmallString<1024> Str;
+  raw_svector_ostream O(Str);
+  const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+  curTarget->setKernelManager(mMeta);
+
+
+  if (curTarget->device()->isSupported(
+        AMDILDeviceInfo::MacroDB)) {
+    // Since we are using the macro db, the first token must be a macro.
+    // So we make up a macro that is never used.
+    // I originally picked -1, but the IL text translater treats them as
+    // unsigned integers.
+    O << "mdef(16383)_out(1)_in(2)\n";
+    O << "mov r0, in0\n";
+    O << "mov r1, in1\n";
+    O << "div_zeroop(infinity) r0.x___, r0.x, r1.x\n";
+    O << "mov out0, r0\n";
+    O << "mend\n";
+  }
+
+
+  // We need to increase the number of reserved literals for
+  // any literals we output manually instead of via the
+  // emitLiteral function. This function should never
+  // have any executable code in it. Only declarations
+  // and the main function patch symbol.
+  if (curTarget->device()->getGeneration() == AMDILDeviceInfo::HDTEST) {
+    O << "il_cs_3_0\n";
+  } else {
+    O << "il_cs_2_0\n";
+  }
+  O << "dcl_cb cb0[15] ; Constant buffer that holds ABI data\n";
+  O << "dcl_literal l0, 0x00000004, 0x00000001, 0x00000002, 0x00000003\n";
+  O << "dcl_literal l1, 0x00FFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFD\n";
+  O << "dcl_literal l2, 0x0000FFFF, 0xFFFFFFFE, 0x000000FF, 0xFFFFFFFC\n";
+  O << "dcl_literal l3, 0x00000018, 0x00000010, 0x00000008, 0xFFFFFFFF\n";
+  O << "dcl_literal l4, 0xFFFFFF00, 0xFFFF0000, 0xFF00FFFF, 0xFFFF00FF\n";
+  O << "dcl_literal l5, 0x00000000, 0x00000004, 0x00000008, 0x0000000C\n";
+  O << "dcl_literal l6, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n";
+  O << "dcl_literal l7, 0x00000018, 0x0000001F, 0x00000010, 0x0000001F\n";
+  O << "dcl_literal l8, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n";
+  O << ";$$$$$$$$$$\n";
+  O << "endmain\n";
+  O << ";DEBUGSTART\n";
+  OutStreamer.EmitRawText(O.str());
+}
+void
+AMDILAsmPrinter::EmitEndOfAsmFile(Module &M)
+{
+  SmallString<1024> Str;
+  raw_svector_ostream O(Str);
+  const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+  O << ";DEBUGEND\n";
+  if (curTarget->device()->isSupported(AMDILDeviceInfo::MacroDB)) {
+    int lines;
+    for (llvm::DenseSet<uint32_t>::iterator msb = mMacroIDs.begin()
+         , mse = mMacroIDs.end(); msb != mse; ++msb) {
+      int idx = *msb;
+      const char* *macro = amd::MacroDBGetMacro(&lines, idx);
+      for (int k = 0; k < lines; ++k) {
+        O << macro[k];
+      }
+    }
+  }
+  if (mAMI) mAMI->dumpDataSection(O, mMFI);
+  O << "\nend\n";
+#ifdef _DEBUG
+  if (mDebugMode) {
+    mTM->dump(O);
+  }
+#endif
+  OutStreamer.EmitRawText(O.str());
+}
+void
+AMDILAsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const
+{
+  assert(0 && "When is this function hit!");
+}
+bool
+AMDILAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned int OpNo,
+                                 unsigned int AsmVariant, const char *ExtraCode)
+{
+  assert(0 && "When is this function hit!");
+  return false;
+}
+bool
+AMDILAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                       unsigned int OpNo, unsigned int AsmVariant, const char *ExtraCode)
+{
+  assert(0 && "When is this function hit!");
+  return false;
+}
+void
+AMDILAsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV)
+{
+  assert(0 && "When is this function hit!");
+}
+void
+AMDILAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+    const MachineBasicBlock *MBB) const
+{
+  assert(0 && "When is this function hit!");
+}
+void
+AMDILAsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+    const MachineBasicBlock *MBB) const
+{
+  assert(0 && "When is this function hit!");
+}
+void
+AMDILAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                        const MachineBasicBlock *MBB,
+                                        unsigned uid) const
+{
+  assert(0 && "When is this function hit!");
+}
+
+void
+AMDILAsmPrinter::EmitFunctionBodyStart()
+{
+  SmallString<1024> Str;
+  raw_svector_ostream O(Str);
+
+  bool isKernel = false;
+  O << "";
+  O << ";DEBUGEND\n";
+  ++mBuffer;
+  isKernel = mMFI->isKernel();
+  uint32_t id = mName.empty()
+                ? mAMI->getOrCreateFunctionID(MF->getFunction())
+                : mAMI->getOrCreateFunctionID(mName);
+  mMeta->setKernel(isKernel);
+  mMeta->setID(id);
+  if (isKernel) {
+    mMeta->printHeader(this, O, mKernelName);
+    mMeta->processArgMetadata(O, mBuffer, isKernel);
+    mMeta->printGroupSize(O);
+    mMeta->printDecls(this, O);
+    AMDILKernel &tmp = *(mMFI->getKernel());
+    // add the literals for the offsets and sizes of
+    // all kernel declared local arrays
+    if (tmp.lvgv) {
+      AMDILLocalArg *lptr = tmp.lvgv;
+      llvm::SmallVector<AMDILArrayMem*, DEFAULT_VEC_SLOTS>::iterator lmb, lme;
+      for (lmb = lptr->local.begin(), lme = lptr->local.end();
+           lmb != lme; ++lmb) {
+        mMFI->addi32Literal((*lmb)->offset);
+        mMFI->addi32Literal((*lmb)->vecSize);
+        mMFI->setUsesLDS();
+      }
+    }
+    // Add the literals for the offsets and sizes of
+    // all the globally scoped constant arrays
+    for (StringMap<AMDILConstPtr>::iterator cmb = mAMI->consts_begin(),
+         cme = mAMI->consts_end(); cmb != cme; ++cmb) {
+      mMFI->addi32Literal((cmb)->second.offset);
+      mMFI->addi32Literal((cmb)->second.size);
+      mMFI->addMetadata(";memory:datareqd");
+      mMFI->setUsesConstant();
+    }
+
+    // Add the literals for the offsets and sizes of
+    // all the kernel constant arrays
+    llvm::SmallVector<AMDILConstPtr, DEFAULT_VEC_SLOTS>::const_iterator cpb, cpe;
+    for (cpb = tmp.constPtr.begin(), cpe = tmp.constPtr.end();
+         cpb != cpe; ++cpb) {
+      mMFI->addi32Literal(cpb->size);
+      mMFI->addi32Literal(cpb->offset);
+      mMFI->setUsesConstant();
+    }
+    mMeta->emitLiterals(O);
+    // Add 1 to the size so that the next literal is the one we want
+    mMeta->printArgCopies(O, this);
+    O << "call " << id << " ; " << mName << "\n";
+    mMeta->printFooter(O);
+    mMeta->printMetaData(O, id, isKernel);
+    O << "func " << id << " ; " << mName << "\n";
+  } else {
+    if (mName.empty()) {
+      std::stringstream ss;
+      ss << "unknown_" << id;
+      mName = ss.str();
+    }
+    mMeta->setName(mName);
+    O << "func " << id << " ; " << mName << "\n";
+    mMeta->processArgMetadata(O, mBuffer, false);
+  }
+  O.flush();
+  OutStreamer.EmitRawText(O.str());
+}
+void
+AMDILAsmPrinter::EmitFunctionBodyEnd()
+{
+  SmallString<1024> Str;
+  raw_svector_ostream O(Str);
+  uint32_t id = mName.empty()
+                ? mAMI->getOrCreateFunctionID(MF->getFunction())
+                : mAMI->getOrCreateFunctionID(mName);
+  if (mName.empty()) {
+    std::stringstream ss;
+    ss << "unknown_" << id;
+    mName = ss.str();
+  }
+  if (mAMI->isKernel(mKernelName)) {
+    O << "ret\nendfunc ; " << mName << "\n";
+    mMeta->setName(mName);
+    mMeta->printMetaData(O, id, false);
+  } else {
+    O << "ret\nendfunc ; " << mName << "\n";
+    mMeta->printMetaData(O, id, false);
+  }
+  mMeta->clear();
+  O << ";DEBUGSTART\n";
+  O.flush();
+  OutStreamer.EmitRawText(O.str());
+}
+void
+AMDILAsmPrinter::EmitConstantPool()
+{
+  if (!mAMI->getKernel(mKernelName)) {
+    return;
+  }
+  AMDILKernel *tmp = mAMI->getKernel(mKernelName);
+  if (!tmp || !tmp->mKernel) {
+    return;
+  }
+  mAMI->calculateCPOffsets(MF, tmp);
+  // Add all the constant pool offsets to the literal table
+  for (uint32_t x = 0; x < tmp->CPOffsets.size(); ++x) {
+    mMFI->addMetadata(";memory:datareqd");
+    mMFI->addi32Literal(tmp->CPOffsets[x].first);
+  }
+
+  // Add all the constant pool constants to the literal tables
+  {
+    const MachineConstantPool *MCP = MF->getConstantPool();
+    const std::vector<MachineConstantPoolEntry> &consts
+    = MCP->getConstants();
+    for (uint32_t x = 0, s = consts.size(); x < s; ++x) {
+      addCPoolLiteral(consts[x].Val.ConstVal);
+    }
+  }
+}
+void
+AMDILAsmPrinter::EmitFunctionEntryLabel()
+{
+  return;
+  assert(0 && "When is this function hit!");
+}
+
+/// getDebugResourceLocation - Get resource id information encoded in
+/// target flags.
+uint32_t AMDILAsmPrinter::getDebugResourceID(const MachineInstr *MI) const
+{
+  const llvm::MachineOperand& opr = MI->getOperand(MI->getNumOperands() - 1);
+  assert(opr.isMetadata());
+  const MDNode *Var = opr.getMetadata();
+  const Value * valOfVar = Var;
+  uint32_t resourceID = mMeta->getUAVID(valOfVar);
+  return resourceID;
+}
+
+bool
+AMDILAsmPrinter::isMacroCall(const MachineInstr *MI)
+{
+  return !strncmp(mTM->getInstrInfo()->getName(MI->getOpcode()), "MACRO", 5);
+}
+
+bool
+AMDILAsmPrinter::isMacroFunc(const MachineInstr *MI)
+{
+  if (MI->getOpcode() != AMDIL::CALL) {
+    return false;
+  }
+  if (!MI->getOperand(0).isGlobal()) {
+    return false;
+  }
+  const llvm::StringRef &nameRef = MI->getOperand(0).getGlobal()->getName();
+  if (nameRef.startswith("__atom_")
+      || nameRef.startswith("__atomic_")) {
+    mMeta->setOutputInst();
+  }
+  return amd::MacroDBFindMacro(nameRef.data()) != -1;
+}
+static const char*
+getRegSwizzle(unsigned reg, bool dst)
+{
+  if (reg >= AMDIL::Rx1 && reg < AMDIL::Rxy1) {
+    return ".x";
+  } else if (reg >= AMDIL::Ry1 && reg < AMDIL::Rz1) {
+    return ".y";
+  } else if (reg >= AMDIL::Rz1 && reg < AMDIL::Rzw1) {
+    return ".z";
+  } else if (reg >= AMDIL::Rw1 && reg < AMDIL::Rx1) {
+    return ".w";
+  } else if (reg >= AMDIL::Rxy1 && reg < AMDIL::Ry1) {
+    return ((dst) ? ".xy__" : ".xy00");
+  } else if (reg >= AMDIL::Rzw1 && reg < AMDIL::SDP) {
+    return ((dst) ? ".__zw" : ".00zw");
+  } else {
+    return  "";
+  }
+}
+void
+AMDILAsmPrinter::emitMCallInst(const MachineInstr *MI, OSTREAM_TYPE &O, const char *name)
+{
+  const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+  int macronum = amd::MacroDBFindMacro(name);
+  int numIn = amd::MacroDBNumInputs(macronum);
+  int numOut = amd::MacroDBNumOutputs(macronum);
+  if (macronum == -1) {
+    return;
+  }
+  if (curTarget->device()->isSupported(
+        AMDILDeviceInfo::MacroDB)) {
+    mMacroIDs.insert(macronum);
+  } else {
+    mMFI->addCalledIntr(macronum);
+  }
+  const TargetRegisterClass *trc = NULL;
+  if (strstr(name, "4f32")
+      || strstr(name, "4i32")) {
+    trc = MF->getTarget()
+          .getRegisterInfo()->getRegClass(AMDIL::GPRV4F32RegClassID);
+  } else if (strstr(name, "2f32")
+             || strstr(name, "2i32")) {
+    trc = MF->getTarget()
+          .getRegisterInfo()->getRegClass(AMDIL::GPRV2F32RegClassID);
+  } else {
+    trc = MF->getTarget()
+          .getRegisterInfo()->getRegClass(AMDIL::GPRF32RegClassID);
+  }
+  O << "\tmcall(" << macronum << ")(";
+  int x ;
+  for (x = 0; x < numOut - 1; ++x) {
+    O << getRegisterName(trc->getRegister(x))
+      << getRegSwizzle(trc->getRegister(x), true) << ", ";
+  }
+  O << getRegisterName(trc->getRegister(x))
+    << getRegSwizzle(trc->getRegister(x), true) << "),(";
+  for (x = 0; x < numIn - 1; ++x) {
+    O << getRegisterName(trc->getRegister(x))
+      << getRegSwizzle(trc->getRegister(x), false) << ", ";
+  }
+  O << getRegisterName(trc->getRegister(x))
+    << getRegSwizzle(trc->getRegister(x), false) << ")";
+  O << " ;" << name <<"\n";
+}
+
+#if defined(LLVM_29) || defined(USE_APPLE)
+void
+AMDILAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const
+{
+}
+#else
+void
+AMDILAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const
+{
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  unsigned reg = MLoc.getReg();
+  unsigned baseReg = AMDIL::R1;
+  const char* regStr = NULL;
+  const char* regxStr = NULL;
+  unsigned offset = 0;
+  unsigned size = 32;
+  const char* offStr = NULL;
+  if (isXComponentReg(reg)) {
+    baseReg += (reg - AMDIL::Rx1);
+    regxStr = "DW_OP_regx for x component of register";
+    regStr = "DW_OP_reg for x component of register";
+    offset = 0;
+    offStr = "DW_OP_bit_piece 32 0";
+  } else if (isYComponentReg(reg)) {
+    baseReg += (reg - AMDIL::Ry1);
+    regxStr = "DW_OP_regx for y component of register";
+    regStr = "DW_OP_reg for y component of register";
+    offset = 32;
+    offStr = "DW_OP_bit_piece 32 32";
+  } else if (isZComponentReg(reg)) {
+    baseReg += (reg - AMDIL::Rz1);
+    regxStr = "DW_OP_regx for z component of register";
+    regStr = "DW_OP_reg for z component of register";
+    offset = 64;
+    offStr = "DW_OP_bit_piece 32 64";
+  } else if (isWComponentReg(reg)) {
+    baseReg += (reg - AMDIL::Rw1);
+    regxStr = "DW_OP_regx for w component of register";
+    regStr = "DW_OP_reg for w component of register";
+    offset = 96;
+    offStr = "DW_OP_bit_piece 32 96";
+  } else if (isXYComponentReg(reg)) {
+    baseReg += (reg - AMDIL::Rxy1);
+    regxStr = "DW_OP_regx for xy component of register";
+    regStr = "DW_OP_reg for xy component of register";
+    offset = 0;
+    size = 64;
+    offStr = "DW_OP_bit_piece 64 0";
+  } else if (isZWComponentReg(reg)) {
+    baseReg += (reg - AMDIL::Rzw1);
+    regxStr = "DW_OP_regx for zw component of register";
+    regStr = "DW_OP_reg for zw component of register";
+    offset = 64;
+    size = 64;
+    offStr = "DW_OP_bit_piece 64 64";
+  } else {
+    baseReg = reg;
+    regxStr = "DW_OP_regx for xyzw component of register";
+    regStr = "DW_OP_reg for xyzw component of register";
+    offset = 0;
+    size = 128;
+    offStr = "DW_OP_bit_piece 128 0";
+  }
+  baseReg = RI->getDwarfRegNum(baseReg, false);
+  OutStreamer.AddComment("Loc expr size");
+  unsigned OffsetSize = MCAsmInfo::getULEB128Size(size)
+                        + MCAsmInfo::getULEB128Size(offset);
+  if (int Offset = MLoc.getOffset()) {
+    OffsetSize += Offset ?  MCAsmInfo::getSLEB128Size(Offset) : 1;
+    OutStreamer.AddComment("Loc expr size");
+    EmitInt16(OffsetSize);
+    OutStreamer.AddComment(
+      dwarf::OperationEncodingString(dwarf::DW_OP_fbreg));
+    EmitInt8(dwarf::DW_OP_fbreg);
+    OutStreamer.AddComment("Offset");
+    EmitSLEB128(Offset);
+  } else if (baseReg < 32) {
+    EmitInt16(2 + OffsetSize);
+    OutStreamer.AddComment(
+      dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + baseReg));
+    EmitInt8(dwarf::DW_OP_reg0 + baseReg);
+  } else {
+    EmitInt16(2 +  MCAsmInfo::getULEB128Size(baseReg) + OffsetSize);
+    OutStreamer.AddComment(regxStr);
+    EmitInt8(dwarf::DW_OP_regx);
+    OutStreamer.AddComment(Twine(baseReg));
+    EmitULEB128(baseReg);
+  }
+
+  OutStreamer.AddComment(offStr);
+  EmitInt8(dwarf::DW_OP_bit_piece);
+  EmitULEB128(size);
+  EmitULEB128(offset);
+}
+#endif

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,244 @@
+//===-- AMDILAsmPrinter.h -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_ASM_PRINTER_H_
+#define _AMDIL_ASM_PRINTER_H_
+#include "AMDIL.h"
+#include "AMDILLLVMVersion.h"
+#include "macrodata.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm
+{
+class AMDILKernelManager;
+class AMDILTargetMachine;
+class AMDILMachineFunctionInfo;
+class AMDILModuleInfo;
+class AnalysisUsage;
+class Constant;
+class Function;
+class Module;
+class MachineInstr;
+class MachineBasicBlock;
+class MachineConstantPoolValue;
+class MachineFunction;
+class MachineJumptableInfo;
+class raw_ostream;
+class MCStreamer;
+class MCSymbol;
+class MCInst;
+class MCContext;
+
+
+class LLVM_LIBRARY_VISIBILITY AMDILAsmPrinter : public AsmPrinter
+{
+public:
+  //
+  // Constructor for the AMDIL specific AsmPrinter class.
+  // Interface is defined by LLVM proper and should reference
+  // there for more information.
+  //
+  explicit AMDILAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS);
+
+  //
+  // Destructor for the AsmPrinter class that deletes all the
+  // allocated memory
+  //
+  virtual ~AMDILAsmPrinter();
+
+  //
+  // @param MI Machine instruction to print the operand of
+  // @param opNum operand to print from the specified machine instruciton
+  // @param O The output stream for the operand
+  // @brief Based on the register type, print out register specific
+  // information
+  // and add swizzle information in the cases that require it
+  //
+  virtual void
+  printOperand(const MachineInstr *MI, int opNum
+               , OSTREAM_TYPE &O
+              );
+
+  void
+  EmitGlobalVariable(const GlobalVariable *GV);
+  // overloading ALL AsmPrinter.h virtual functions to better
+  // understand how everything works
+  void
+  EmitStartOfAsmFile(Module &M);
+  void
+  EmitEndOfAsmFile(Module &M);
+  void
+  PrintSpecial(const MachineInstr *MI, const char *Code) const;
+  bool
+  PrintAsmOperand(const MachineInstr *MI, unsigned int OpNo,
+                  unsigned int AsmVariant, const char *ExtraCode);
+  bool
+  PrintAsmMemoryOperand(const MachineInstr *MI, unsigned int OpNo,
+                        unsigned int AsmVariant,
+                        const char *ExtraCode);
+  void
+  EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+  void
+  printPICJumpTableSetLabel(unsigned uid,
+                            const MachineBasicBlock *MBB) const;
+  void
+  printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+                            const MachineBasicBlock *MBB) const;
+  void
+  printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                         const MachineBasicBlock *MBB,
+                         unsigned uid) const;
+  virtual void
+  EmitInstruction(const MachineInstr *MI);
+  void
+  EmitFunctionBodyStart();
+  void
+  EmitFunctionBodyEnd();
+  void
+  EmitConstantPool();
+  void
+  EmitFunctionEntryLabel();
+
+  virtual uint32_t getDebugResourceID(const MachineInstr *MI) const;
+
+  //
+  // @param MI Machine instruction to print memory operand of
+  // @param opNum operand to print from the specified machine instrucion
+  // @param Modifier optional modifier for the memory operand
+  // @brief Print the memory operand based on the register type
+  //
+  void
+  printMemOperand(const MachineInstr *MI, int opNum,
+                  OSTREAM_TYPE &O,
+                  const char *Modifier = NULL);
+
+  //
+  // @param MI Machine instruction to print to the buffer
+  // @brief autogenerated function from tablegen files that prints out
+  // the assembly format of the specified instruction
+  //
+  void
+  printInstruction(const MachineInstr *MI , OSTREAM_TYPE &O); // autogenerated
+
+  const char *getRegisterName(unsigned RegNo);
+
+  //
+  // @param F MachineFunction to print the assembly for
+  // @brief parse the specified machine function and print
+  // out the assembly for all the instructions in the function
+  //
+  bool
+  runOnMachineFunction(MachineFunction &F);
+
+  //
+  // @param MI Machine Instruction to determine if it a macro call
+  // @brief Query to see if the instruction is a Macro or not
+  // @return true if instruction is a macro
+  //
+  bool
+  isMacroCall(const MachineInstr *MI);
+
+  //
+  // @param MI Machine Instruction to determine if the fucntion is a macro
+  // @brief determine if the function is a macro function or a normal
+  // function
+  // @return true if the function call should be transformed to a macro,
+  // false otherwise
+  //
+  bool
+  isMacroFunc(const MachineInstr *MI);
+
+
+  //
+  // @param MI Machine instruction to print swizzle for
+  // @param opNum the operand number to print swizzle for
+  // @brief print out the swizzle for a scalar register class
+  //
+  const char*
+  getSwizzle(const MachineInstr *MI, int opNum);
+
+  //
+  // @return the name of this specific pass
+  //
+  virtual const char*
+  getPassName() const;
+
+  /// EmitDwarfRegOp - Emit dwarf register operation
+  virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
+
+
+protected:
+  //
+  // @param MI Machine instruction to emit the macro code for
+  //
+  // Emits a fully functional macro function that uses the argument
+  // registers as the macro arguments.
+  //
+  virtual void
+  emitMacroFunc(const MachineInstr *MI , OSTREAM_TYPE &O);
+
+  // Flag whether to print out debug information
+  // or not.
+  bool mDebugMode;
+
+  //
+  //
+  void
+  emitMCallInst(const MachineInstr *MI, OSTREAM_TYPE &O, const char *name);
+
+  // Set of alll macros that are used in this compilation unit.
+  llvm::DenseSet<uint32_t> mMacroIDs;
+
+  /// Pointer to the Target Machine that the asm printer
+  /// should be printing compatible code for.
+  AMDILTargetMachine *mTM;
+
+  /// pointer to the kernel manager that keeps track
+  /// of the metadata required by the runtime to
+  /// call a kernel correctly.
+  AMDILKernelManager *mMeta;
+
+  /// Class that holds information about the current
+  /// function that is being processed.
+  AMDILMachineFunctionInfo *mMFI;
+
+  /// Class that holds information about the current
+  /// module that is being processed.
+  AMDILModuleInfo *mAMI;
+
+  /// Name of the current function being printed
+  /// by the asm printer
+  std::string mName;
+
+  /// name of the kernel wrapper of the current function
+  std::string mKernelName;
+
+private:
+  void addCPoolLiteral(const Constant *C);
+
+  /// The constant buffer that the data should be
+  /// allocated in by the runtime
+  int mBuffer;
+
+  /// Flag to determine whether the printer needs
+  /// to print assembly version information in the metadata
+  bool mNeedVersion;
+};
+
+
+} // end of llvm namespace
+
+#endif // _AMDIL_ASM_PRINTER_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,191 @@
+//===-- AMDILBarrierDetect.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "BarrierDetect"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+// The barrier detect pass determines if a barrier has been duplicated in the
+// source program which can cause undefined behaviour if more than a single
+// wavefront is executed in a group. This is because LLVM does not have an
+// execution barrier and if this barrier function gets duplicated, undefined
+// behaviour can occur. In order to work around this, we detect the duplicated
+// barrier and then make the work-group execute in a single wavefront mode,
+// essentially making the barrier a no-op.
+
+namespace
+{
+class LLVM_LIBRARY_VISIBILITY AMDILBarrierDetect : public FunctionPass
+{
+  TargetMachine &TM;
+  static char ID;
+public:
+  AMDILBarrierDetect(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+  ~AMDILBarrierDetect();
+  const char *getPassName() const;
+  bool runOnFunction(Function &F);
+  bool doInitialization(Module &M);
+  bool doFinalization(Module &M);
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+private:
+  bool detectBarrier(BasicBlock::iterator *BBI);
+  bool mChanged;
+  SmallVector<int64_t, DEFAULT_VEC_SLOTS> bVecMap;
+  const AMDILSubtarget *mStm;
+
+  // Constants used to define memory type.
+  static const unsigned int LOCAL_MEM_FENCE = 1<<0;
+  static const unsigned int GLOBAL_MEM_FENCE = 1<<1;
+  static const unsigned int REGION_MEM_FENCE = 1<<2;
+};
+char AMDILBarrierDetect::ID = 0;
+} // anonymouse namespace
+
+namespace llvm
+{
+FunctionPass *
+createAMDILBarrierDetect(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+{
+  return new AMDILBarrierDetect(TM, OptLevel);
+}
+} // llvm namespace
+
+AMDILBarrierDetect::AMDILBarrierDetect(TargetMachine &TM,
+                                       CodeGenOpt::Level OptLevel)
+  :
+  FunctionPass(ID),
+  TM(TM)
+{
+}
+
+AMDILBarrierDetect::~AMDILBarrierDetect()
+{
+}
+
+bool AMDILBarrierDetect::detectBarrier(BasicBlock::iterator *BBI)
+{
+  SmallVector<int64_t, DEFAULT_VEC_SLOTS>::iterator bIter;
+  int64_t bID;
+  Instruction *inst = (*BBI);
+  CallInst *CI = dyn_cast<CallInst>(inst);
+
+  if (!CI || !CI->getNumOperands()) {
+    return false;
+  }
+  const Value *funcVal = CI->getOperand(CI->getNumOperands() - 1);
+  if (funcVal && strncmp(funcVal->getName().data(), "barrier", 7)) {
+    return false;
+  }
+
+  if (inst->getNumOperands() >= 3) {
+    const Value *V = inst->getOperand(0);
+    const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+    bID = Cint->getSExtValue();
+    bIter = std::find(bVecMap.begin(), bVecMap.end(), bID);
+    if (bIter == bVecMap.end()) {
+      bVecMap.push_back(bID);
+    } else {
+      if (mStm->device()->isSupported(AMDILDeviceInfo::BarrierDetect)) {
+        AMDILMachineFunctionInfo *MFI =
+          getAnalysis<MachineFunctionAnalysis>().getMF()
+          .getInfo<AMDILMachineFunctionInfo>();
+        MFI->addMetadata(";limitgroupsize");
+        MFI->addErrorMsg(amd::CompilerWarningMessage[BAD_BARRIER_OPT]);
+      }
+    }
+  }
+  if (mStm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+    AMDILMachineFunctionInfo *MFI =
+      getAnalysis<MachineFunctionAnalysis>().getMF()
+      .getInfo<AMDILMachineFunctionInfo>();
+    MFI->addErrorMsg(amd::CompilerWarningMessage[LIMIT_BARRIER]);
+    MFI->addMetadata(";limitgroupsize");
+    MFI->setUsesLDS();
+  }
+  const Value *V = inst->getOperand(inst->getNumOperands()-2);
+  const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+  Function *iF = dyn_cast<Function>(inst->getOperand(inst->getNumOperands()-1));
+  Module *M = iF->getParent();
+  bID = Cint->getSExtValue();
+  if (bID > 0) {
+    const char *name = "barrier";
+    if (bID == GLOBAL_MEM_FENCE) {
+      name = "barrierGlobal";
+    } else if (bID == LOCAL_MEM_FENCE
+               && mStm->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+      name = "barrierLocal";
+    } else if (bID == REGION_MEM_FENCE
+               && mStm->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+      name = "barrierRegion";
+    }
+    Function *nF =
+      dyn_cast<Function>(M->getOrInsertFunction(name, iF->getFunctionType()));
+    inst->setOperand(inst->getNumOperands()-1, nF);
+    return false;
+  }
+
+  return false;
+}
+
+bool AMDILBarrierDetect::runOnFunction(Function &MF)
+{
+  mChanged = false;
+  bVecMap.clear();
+  mStm = &TM.getSubtarget<AMDILSubtarget>();
+  Function *F = &MF;
+  safeNestedForEach(F->begin(), F->end(), F->begin()->begin(),
+                    std::bind1st(
+                      std::mem_fun(
+                        &AMDILBarrierDetect::detectBarrier), this));
+  return mChanged;
+}
+
+const char* AMDILBarrierDetect::getPassName() const
+{
+  return "AMDIL Barrier Detect Pass";
+}
+
+bool AMDILBarrierDetect::doInitialization(Module &M)
+{
+  return false;
+}
+
+bool AMDILBarrierDetect::doFinalization(Module &M)
+{
+  return false;
+}
+
+void AMDILBarrierDetect::getAnalysisUsage(AnalysisUsage &AU) const
+{
+  AU.addRequired<MachineFunctionAnalysis>();
+  FunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,106 @@
+//===-- AMDILBase.td ------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AMDIL Subtarget features.
+//===----------------------------------------------------------------------===//
+def FeatureFP64     : SubtargetFeature<"fp64",
+        "CapsOverride[AMDILDeviceInfo::DoubleOps]",
+        "true",
+        "Enable 64bit double precision operations">;
+def FeatureByteAddress    : SubtargetFeature<"byte_addressable_store",
+        "CapsOverride[AMDILDeviceInfo::ByteStores]",
+        "true",
+        "Enable byte addressable stores">;
+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
+        "CapsOverride[AMDILDeviceInfo::BarrierDetect]",
+        "true",
+        "Enable duplicate barrier detection(HD5XXX or later).">;
+def FeatureImages : SubtargetFeature<"images",
+        "CapsOverride[AMDILDeviceInfo::Images]",
+        "true",
+        "Enable image functions">;
+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
+        "CapsOverride[AMDILDeviceInfo::MultiUAV]",
+        "true",
+        "Generate multiple UAV code(HD5XXX family or later)">;
+def FeatureMacroDB : SubtargetFeature<"macrodb",
+        "CapsOverride[AMDILDeviceInfo::MacroDB]",
+        "true",
+        "Use internal macrodb, instead of macrodb in driver">;
+def FeatureNoAlias : SubtargetFeature<"noalias",
+        "CapsOverride[AMDILDeviceInfo::NoAlias]",
+        "true",
+        "assert that all kernel argument pointers are not aliased">;
+def FeatureNoInline : SubtargetFeature<"no-inline",
+        "CapsOverride[AMDILDeviceInfo::NoInline]",
+        "true",
+        "specify whether to not inline functions">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+        "mIs64bit",
+        "false",
+        "Specify if 64bit addressing should be used.">;
+
+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+        "mIs32on64bit",
+        "false",
+        "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+def FeatureDebug : SubtargetFeature<"debug",
+        "CapsOverride[AMDILDeviceInfo::Debug]",
+        "true",
+        "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+
+include "AMDILRegisterInfo.td"
+include "AMDILCallingConv.td"
+include "AMDILInstrInfo.td"
+
+def AMDILInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// AMDIL processors supported.
+//===----------------------------------------------------------------------===//
+include "Processors.td"
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+def AMDILAsmWriter : AsmWriter {
+    string AsmWriterClassName = "AsmPrinter";
+    int Variant = 0;
+}
+
+def AMDILAsmParser : AsmParser {
+    string AsmParserClassName = "AsmParser";
+    int Variant = 0;
+
+    string CommentDelimiter = ";";
+
+    string RegisterPrefix = "r";
+
+}
+
+
+def AMDIL : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = AMDILInstrInfo;
+  let AssemblyWriters = [AMDILAsmWriter];
+  let AssemblyParsers = [AMDILAsmParser];
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,4057 @@
+//===-- AMDILCFGStructurizer.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "structcfg"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "llvm/Support/Debug.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define FirstNonDebugInstr(A) A->begin()
+using namespace llvm;
+
+// bixia TODO: move this out to analysis lib. Make this work for both target
+// AMDIL and CBackend.
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
+          "matched");
+STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
+          "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+          "pattern matched");
+STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
+          "pattern matched");
+STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
+          "matched");
+STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct
+{
+#define SHOWNEWINSTR(i) \
+  if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+  errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+  if (b) { \
+  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+  b->print(errs()); \
+  errs() << "\n"; \
+  } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS)
+{
+  for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+       iterEnd = LoopInfo.end();
+       iter != iterEnd; ++iter) {
+    (*iter)->print(OS, 0);
+  }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src)
+{
+  size_t sz = Src.size();
+  for (size_t i = 0; i < sz/2; ++i) {
+    NodeT *t = Src[i];
+    Src[i] = Src[sz - i - 1];
+    Src[sz - i - 1] = t;
+  }
+}
+
+} //end namespace llvmCFGStruct
+
+static MachineInstr *getLastBreakInstr(MachineBasicBlock *blk)
+{
+  for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(); (iter != blk->rend()); ++iter) {
+    MachineInstr *instr = &(*iter);
+    if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDIL::BREAK_LOGICALZ_i32)) {
+      return instr;
+    }
+  }
+  return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// MachinePostDominatorTree
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/DominatorInternals.h"
+
+namespace llvm
+{
+
+extern void initializeMachinePostDominatorTreePass(PassRegistry&);
+FunctionPass *createMachinePostDominatorTreePass();
+
+/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
+/// to compute the a post-dominator tree.
+///
+struct MachinePostDominatorTree : public MachineFunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  DominatorTreeBase<MachineBasicBlock> *DT;
+  MachinePostDominatorTree() : MachineFunctionPass(ID) {
+    initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
+    DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+    // postdominator
+  }
+
+  ~MachinePostDominatorTree();
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  inline const std::vector<MachineBasicBlock *> &getRoots() const {
+    return DT->getRoots();
+  }
+
+  inline MachineDomTreeNode *getRootNode() const {
+    return DT->getRootNode();
+  }
+
+  inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+    return DT->getNode(BB);
+  }
+
+  inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+    return DT->getNode(BB);
+  }
+
+  inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+    return DT->dominates(A, B);
+  }
+
+  inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+    return DT->dominates(A, B);
+  }
+
+  inline bool
+  properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+    return DT->properlyDominates(A, B);
+  }
+
+  inline bool
+  properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+    return DT->properlyDominates(A, B);
+  }
+
+  inline MachineBasicBlock *
+  findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
+    return DT->findNearestCommonDominator(A, B);
+  }
+
+  virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
+    DT->print(OS);
+  }
+};
+} //end of namespace llvm
+
+char MachinePostDominatorTree::ID = 0;
+INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
+                "MachinePostDominator Tree Construction",
+                true, true)
+
+FunctionPass *llvm::createMachinePostDominatorTreePass()
+{
+  return new MachinePostDominatorTree();
+}
+
+//const PassInfo *const llvm::MachinePostDominatorsID
+//= &machinePostDominatorTreePass;
+
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F)
+{
+  DT->recalculate(F);
+  //DEBUG(DT->dump());
+  return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree()
+{
+  delete DT;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation
+{
+public:
+  bool isRetired;
+  int  sccNum;
+  //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+  //Instructions defining the corresponding successor.
+  BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation
+{
+public:
+  BlockT *landBlk;
+  std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
+  //WHILELOOP(thisloop) init before entering
+  //thisloop.
+  std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
+  //WHILELOOP(thisloop) init after entering
+  //thisloop.
+  std::set<RegiT> endbranchInitRegs; //Init after entering this loop, at loop
+  //land block, branch cond on this reg.
+  std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
+  //endif" after ENDLOOP(thisloop) break
+  //outerLoopOf(thisLoop).
+  std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
+  //endif" after ENDLOOP(thisloop) continue on
+  //outerLoopOf(thisLoop).
+  LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// TrivialRegAlloc
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// Stores the list of defs and uses of a virtual register
+class DefUseList
+{
+  enum {
+    FLAG_DEF = 0,
+    FLAG_USE = 1
+  };
+
+public:
+  // struct that represents a single def or use
+  struct DefOrUseT {
+    unsigned _slotIndex;
+    unsigned _flag; // flag whether this is a def or use
+    bool isDef() const {
+      return _flag == FLAG_DEF;
+    }
+    bool isUse() const {
+      return _flag == FLAG_USE;
+    }
+    DefOrUseT(unsigned slotIndex, unsigned flag)
+      : _slotIndex(slotIndex), _flag(flag) {}
+  };
+
+private:
+  typedef SmallVector<DefOrUseT, 2> DefUseVecT;
+
+public:
+  typedef DefUseVecT::iterator iterator;
+  typedef DefUseVecT::const_iterator const_iterator;
+
+  DefUseVecT _defUses;
+
+  DefUseList() : _defUses() {}
+  void addDef(unsigned slotIndex) {
+    _defUses.push_back(DefOrUseT(slotIndex, FLAG_DEF));
+  }
+  void addUse(unsigned slotIndex) {
+    _defUses.push_back(DefOrUseT(slotIndex, FLAG_USE));
+  }
+  void clear() {
+    _defUses.clear();
+  }
+  iterator begin() {
+    return _defUses.begin();
+  }
+  const_iterator begin() const {
+    return _defUses.begin();
+  }
+  iterator end() {
+    return _defUses.end();
+  }
+  const_iterator end() const {
+    return _defUses.end();
+  }
+  bool isSorted() const;
+  void dump() const;
+};
+
+bool DefUseList::isSorted() const
+{
+  const_iterator it = begin();
+  const_iterator e = end();
+  assert(it != e && "no def/use");
+  const_iterator pre = it;
+  for (++it; it != e; ++it) {
+    if ((*pre)._slotIndex > (*it)._slotIndex) {
+      return false;
+    }
+    pre = it;
+  }
+  return true;
+}
+
+void DefUseList::dump() const
+{
+  for (const_iterator it = begin(), e = end(); it != e; ++it) {
+    const DefOrUseT& defOrUse = *it;
+    const char* str = defOrUse.isDef() ? "def" : "use";
+    errs() << "    " << defOrUse._slotIndex << " " << str << "\n";
+  }
+}
+
+// a live interval
+class LiveInterval
+{
+  enum {
+    UndefinedSlotIndex = -1
+  };
+  unsigned _vreg;
+  int _startSlotIndex;
+  int _endSlotIndex;
+
+public:
+  LiveInterval(unsigned vreg)
+    : _vreg(vreg),
+      _startSlotIndex(UndefinedSlotIndex),
+      _endSlotIndex(UndefinedSlotIndex)
+  {}
+  bool hasStart() const {
+    return _startSlotIndex != UndefinedSlotIndex;
+  }
+  bool hasEnd() const {
+    return _endSlotIndex != UndefinedSlotIndex;
+  }
+  void setStart(int slotIndex) {
+    _startSlotIndex = slotIndex;
+  }
+  void setEnd(int slotIndex) {
+    _endSlotIndex = slotIndex;
+  }
+  unsigned vreg() const {
+    return _vreg;
+  }
+  unsigned start() const {
+    return _startSlotIndex;
+  }
+  unsigned end() const {
+    return _endSlotIndex;
+  }
+};
+
+// a list of live intervals
+class LiveIntervals
+{
+  typedef SmallVector<LiveInterval, 16> IntervalVecType;
+
+public:
+  typedef IntervalVecType::iterator iterator;
+  typedef IntervalVecType::const_iterator const_iterator;
+
+private:
+  IntervalVecType _intervals;
+  bool _sorted; // whether the intervals are sorted by start position
+
+private:
+  iterator findIntervalImpl(unsigned vreg);
+
+public:
+  LiveIntervals(bool sorted) : _intervals(), _sorted(sorted) {}
+  LiveInterval* findInterval(unsigned vreg) {
+    iterator it = findIntervalImpl(vreg);
+    if (it == _intervals.end()) {
+      return NULL;
+    }
+    return &*it;
+  }
+  LiveInterval& createInterval(unsigned vreg) {
+    _intervals.push_back(LiveInterval(vreg));
+    return _intervals.back();
+  }
+  void appendInterval(LiveInterval& interval) {
+    if (_sorted) {
+      assert((_intervals.size() == 0
+              || interval.start() >= _intervals.back().start())
+             && "unsorted append into sorted LiveIntervals");
+    }
+    _intervals.push_back(interval);
+  }
+  void insertInterval(LiveInterval& interval) {
+    if (!_sorted) {
+      _intervals.push_back(interval);
+      return;
+    }
+    insertIntervalSorted(interval);
+  }
+
+  void removeInterval(unsigned vreg);
+  iterator removeInterval(iterator it) {
+    return _intervals.erase(it);
+  }
+  void clear() {
+    _intervals.clear();
+  }
+  iterator begin() {
+    return _intervals.begin();
+  }
+  iterator end() {
+    return _intervals.end();
+  }
+  bool isSortedByStart() const;
+  void dump() const;
+
+private:
+  void insertIntervalSorted(LiveInterval& interval);
+};
+
+LiveIntervals::iterator LiveIntervals::findIntervalImpl(unsigned vreg)
+{
+  iterator it = _intervals.begin();
+  iterator end = _intervals.end();
+  for (; it != end; ++it) {
+    if ((*it).vreg() == vreg) {
+      break;
+    }
+  }
+  return it;
+}
+
+void LiveIntervals::insertIntervalSorted(LiveInterval& interval)
+{
+  iterator it = _intervals.begin();
+  iterator end = _intervals.end();
+  for (; it != end; ++it) {
+    if (interval.start() >= (*it).start()) {
+      break;
+    }
+  }
+  _intervals.insert(it, interval);
+}
+
+void LiveIntervals::removeInterval(unsigned vreg)
+{
+  iterator it = findIntervalImpl(vreg);
+  assert(it != _intervals.end() && "interval not found");
+  _intervals.erase(it);
+}
+
+bool LiveIntervals::isSortedByStart() const
+{
+  const_iterator it = _intervals.begin();
+  const_iterator end = _intervals.end();
+  if (it == end) {
+    return true;
+  }
+  const_iterator pre = it;
+  ++it;
+  for (; it != end; ++it) {
+    if ((*pre).start() > (*it).start()) {
+      return false;
+    }
+    pre = it;
+  }
+  return true;
+}
+
+void LiveIntervals::dump() const
+{
+  errs() << "Intervals:\n";
+  const_iterator it = _intervals.begin();
+  const_iterator end = _intervals.end();
+  for (; it != end; ++it) {
+    const LiveInterval& interval = *it;
+    errs() << "  vreg " << TargetRegisterInfo::virtReg2Index(interval.vreg())
+           << " start " << interval.start()
+           << " end " << interval.end() << "\n";
+  }
+}
+
+// Trivial linear scan register allocator to allocate physical registers
+// for registers requested during CFGStructurizer pass.
+// Since register allocator has already been run before this pass, we have
+// to define our own register allocator to do very simple register allocation
+// for registers requested during this pass.
+class TrivialRegAlloc
+{
+  typedef SmallVector<LiveIntervals, 2>   IntervalsVecT;
+  typedef std::map<unsigned, unsigned>    RegMapT;
+  typedef std::set<unsigned>              RegSetT;
+  typedef std::map<unsigned, DefUseList*> VRegDefUseMapT;
+
+private:
+  // data structures passed in to this class
+
+  MachineFunction& _func;
+  const TargetRegisterClass& _regClass;
+  // virtual registers that need physical registers to be allocated
+  RegSetT& _vregs;
+
+  // data structures created within this class
+
+  VRegDefUseMapT _vregDefUseMap; // map vreg -> its def/use list
+  BitVector _regInUse; // flags which registers are currently in use
+  // set of physical registers that can be alloc'ed
+  std::vector<unsigned> _regSet;
+  RegMapT _regMap; // virtual to physical register map
+  LiveIntervals _intervals; // list of all live intervals
+  // transient list of currently active live intervals
+  LiveIntervals _activeIntervals;
+  // transient current interval for which we are trying to allocate a register
+  LiveInterval* _currInterval;
+
+private:
+  void initRegSet();
+  void computeIntervals();
+  unsigned getPhysicalRegister();
+  void allocateRegisterFor(LiveInterval& interval);
+  void releaseRegisterFor(const LiveInterval& interval);
+  void handleActiveIntervals(unsigned pos);
+  void allocateRegisters();
+  void rewrite();
+
+public:
+  TrivialRegAlloc(MachineFunction& func,
+                  const TargetRegisterClass& regClass,
+                  RegSetT& vregs);
+  ~TrivialRegAlloc();
+  void run(); // main driver of the algorithm
+};
+
+TrivialRegAlloc::TrivialRegAlloc(MachineFunction& func,
+                                 const TargetRegisterClass& regClass,
+                                 RegSetT& vregs)
+  : _func(func), _regClass(regClass), _vregs(vregs),
+    _vregDefUseMap(), _regInUse(), _regSet(), _regMap(),
+    _intervals(true), _activeIntervals(false), _currInterval(NULL)
+{
+  assert(_regClass.getID() == AMDIL::GPRI32RegClassID && "unimplemented");
+}
+
+TrivialRegAlloc::~TrivialRegAlloc()
+{
+  for (VRegDefUseMapT::iterator I = _vregDefUseMap.begin(),
+       E = _vregDefUseMap.end();
+       I != E; ++I) {
+    delete I->second;
+  }
+}
+
+// find all physical registers that are still available after the global
+// register allocator
+static void findAvailPhysRegs(MachineFunction& func,
+                              const class TargetRegisterClass& regClass,
+                              std::vector<unsigned>& regSet)
+{
+  ArrayRef<uint16_t> AllocOrder = regClass.getRawAllocationOrder(func);
+  for (const uint16_t *begin = AllocOrder.begin(), *end = AllocOrder.end();
+       begin != end;
+       ++begin) {
+    uint16_t tempReg = *begin;
+    if (func.getRegInfo().isPhysRegOrOverlapUsed(tempReg)) {
+      continue;
+    }
+
+    if (tempReg) {
+      regSet.push_back(tempReg);
+    }
+  }
+}
+
+// initialize the register set with remaining physical registers that are still
+// available and the set of physical registers reserved for CFGStructurizer
+void TrivialRegAlloc::initRegSet()
+{
+  findAvailPhysRegs(_func, _regClass, _regSet);
+  for (unsigned i = AMDIL::CFG1; i <= AMDIL::CFG10; ++i) {
+    _regSet.push_back(i);
+  }
+  _regInUse.resize(_regSet.size(), 0);
+  if (DEBUGME) {
+    errs() << "available physical registers:\n   ";
+    for (std::vector<unsigned>::iterator it = _regSet.begin(),
+         e = _regSet.end(); it != e; ++it) {
+      errs() << " " << *it;
+    }
+    errs() << "\n";
+  }
+}
+
+// compute live intervals for the virtual registers created during
+// CFGStructurizer pass
+void TrivialRegAlloc::computeIntervals()
+{
+  MachineBasicBlock* entryBlk
+  = GraphTraits<MachineFunction*>::nodes_begin(&_func);
+  unsigned slotIndex = 0;
+  if (DEBUGME) errs() << "start computeIntervals()\n";
+  // there is only one block now in the function
+  for (MachineBasicBlock::iterator iter = entryBlk->begin(),
+       iterEnd = entryBlk->end();
+       iter != iterEnd;
+       ++iter) {
+    MachineInstr* inst = iter;
+    if (DEBUGME) errs() << *inst;
+    for (unsigned i = 0; i < inst->getNumOperands(); ++i) {
+      MachineOperand& oper = inst->getOperand(i);
+      if (!oper.isReg() || !oper.getReg()) {
+        continue;
+      }
+      unsigned vreg = oper.getReg();
+      // if not a virtual register that needs reg alloc, skip
+      if (!_vregs.count(vreg)) {
+        continue;
+      }
+      if (DEBUGME) errs() << "  oper " << oper << " vreg " << TargetRegisterInfo::virtReg2Index(vreg) << "\n";
+      // add to vreg's def/use list
+      DefUseList*& defUses = _vregDefUseMap[vreg];
+      LiveInterval* interval = _intervals.findInterval(vreg);
+      if (oper.isDef()) {
+        if (defUses == NULL) {
+          defUses = new DefUseList();
+        }
+        defUses->addDef(slotIndex);
+        if (interval == NULL) {
+          interval = &_intervals.createInterval(vreg);
+        }
+        if (!interval->hasStart()) {
+          interval->setStart(slotIndex);
+          if (DEBUGME)
+            errs() << "interval for vreg "
+                   << TargetRegisterInfo::virtReg2Index(vreg)
+                   << " start at " << slotIndex << "\n";
+        } else {
+          assert(slotIndex > interval->start() && "sanity");
+        }
+      } else {
+        assert(defUses && "use before def");
+        defUses->addUse(slotIndex);
+        assert(interval && "use before def");
+        assert((!interval->hasEnd() || slotIndex > interval->end())
+               && "sanity");
+        interval->setEnd(slotIndex);
+        if (DEBUGME)
+          errs() << "interval for vreg "
+                 << TargetRegisterInfo::virtReg2Index(vreg)
+                 << " end at " << slotIndex << "\n";
+      }
+    }
+    ++slotIndex;
+  }
+  if (DEBUGME) {
+    _intervals.dump();
+    errs() << "def/use map: \n";
+    for (VRegDefUseMapT::const_iterator it = _vregDefUseMap.begin(),
+         e = _vregDefUseMap.end(); it != e; ++it) {
+      DefUseList* defUses = it->second;
+      errs() << "  vreg "
+             << TargetRegisterInfo::virtReg2Index(it->first) << "\n";
+      defUses->dump();
+    }
+  }
+  assert(_intervals.isSortedByStart() && "_intervals not sorted");
+#ifndef NDEBUG
+  for (VRegDefUseMapT::iterator I = _vregDefUseMap.begin(),
+       E = _vregDefUseMap.end();
+       I != E; ++I) {
+    assert(I->second->isSorted() && "def/uses not sorted");
+  }
+#endif
+}
+
+// pick a physical register that is not in use
+unsigned TrivialRegAlloc::getPhysicalRegister()
+{
+  for (unsigned i = 0; i < _regInUse.size(); ++i) {
+    if (!_regInUse[i]) {
+      _regInUse[i] = 1;
+      return _regSet[i];
+    }
+  }
+  // No physical register available. Has to spill.
+  // TODO: add spiller
+  abort();
+  return 0;
+}
+
+// allocate a physical register for the live interval
+void TrivialRegAlloc::allocateRegisterFor(LiveInterval& interval)
+{
+  _currInterval = &interval;
+  unsigned vreg = interval.vreg();
+  unsigned physicalReg = getPhysicalRegister();
+  _regMap[vreg] = physicalReg;
+  if (DEBUGME)
+    errs() << "allocated reg " << physicalReg << " to vreg "
+           << TargetRegisterInfo::virtReg2Index(vreg) << "\n";
+// _func->getRegInfo().setPhysRegUsed(tempReg);
+}
+
+// release physical register allocated for the interval
+void TrivialRegAlloc::releaseRegisterFor(const LiveInterval& interval)
+{
+  unsigned physicalReg = _regMap[interval.vreg()];
+  unsigned i = 0;
+  for (; i < _regSet.size(); ++i) {
+    if (_regSet[i] == physicalReg) {
+      break;
+    }
+  }
+  assert(i < _regSet.size() && "invalid physical regsiter");
+  _regInUse[i] = 0;
+}
+
+// remove out of active intervals list if an interval becomes inactive
+void TrivialRegAlloc::handleActiveIntervals(unsigned pos)
+{
+  for (LiveIntervals::iterator it = _activeIntervals.begin();
+       it != _activeIntervals.end();) {
+    LiveInterval& interval = *it;
+    if (pos > interval.end()) {
+      releaseRegisterFor(interval);
+      it = _activeIntervals.removeInterval(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+// allocate physical registers for each live interval in the interval list
+void TrivialRegAlloc::allocateRegisters()
+{
+  // intervals that just become active
+  for (LiveIntervals::iterator it = _intervals.begin(),
+       end = _intervals.end();
+       it != end;
+       ++it) {
+    LiveInterval& interval = *it;
+    // remove intervals that become inactive out of active list
+    handleActiveIntervals(interval.start());
+    // interval becomes active
+    _activeIntervals.appendInterval(interval);
+    // allocate registers for interval that just becomes active
+    allocateRegisterFor(interval);
+  }
+}
+
+// rewrite the machine instructions to use the physical registers allocated
+void TrivialRegAlloc::rewrite()
+{
+  MachineBasicBlock* entryBlk
+  = GraphTraits<MachineFunction*>::nodes_begin(&_func);
+  // there is only one block now in the function
+  for (MachineBasicBlock::iterator iter = entryBlk->begin(),
+       iterEnd = entryBlk->end();
+       iter != iterEnd;
+       ++iter) {
+    MachineInstr* inst = iter;
+    for (unsigned i = 0; i < inst->getNumOperands(); ++i) {
+      MachineOperand& oper = inst->getOperand(i);
+      if (!oper.isReg() || !oper.getReg()) {
+        continue;
+      }
+      unsigned vreg = oper.getReg();
+      // if not a virtual register that needs reg alloc, skip
+      if (!_vregs.count(vreg)) {
+        continue;
+      }
+      assert(_regMap.find(vreg) != _regMap.end() && "register not allocated");
+      unsigned physicalReg = _regMap[vreg];
+      oper.setReg(physicalReg);
+    }
+  }
+}
+
+// the main driver of this register allocator
+void TrivialRegAlloc::run()
+{
+  initRegSet();
+  computeIntervals();
+  allocateRegisters();
+  rewrite();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class CFGStructurizer
+{
+public:
+  typedef enum {
+    Not_SinglePath = 0,
+    SinglePath_InPath = 1,
+    SinglePath_NotInPath = 2
+  } PathToKind;
+
+public:
+  typedef typename PassT::InstructionType         InstrT;
+  typedef typename PassT::FunctionType            FuncT;
+  typedef typename PassT::DominatortreeType       DomTreeT;
+  typedef typename PassT::PostDominatortreeType   PostDomTreeT;
+  typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
+  typedef typename PassT::LoopinfoType            LoopInfoT;
+
+  typedef GraphTraits<FuncT *>                    FuncGTraits;
+  //typedef FuncGTraits::nodes_iterator BlockIterator;
+  typedef typename FuncT::iterator                BlockIterator;
+
+  typedef typename FuncGTraits::NodeType          BlockT;
+  typedef GraphTraits<BlockT *>                   BlockGTraits;
+  typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
+  //typedef BlockGTraits::succ_iterator InstructionIterator;
+  typedef typename BlockT::iterator               InstrIterator;
+
+  typedef CFGStructTraits<PassT>                  CFGTraits;
+  typedef BlockInformation<InstrT>                BlockInfo;
+  typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
+
+  typedef int                                     RegiT;
+  typedef typename PassT::LoopType                LoopT;
+  typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
+  typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+  //landing info for loop break
+  typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
+
+public:
+  CFGStructurizer();
+  ~CFGStructurizer();
+
+  /// Perform the CFG structurization
+  bool run(FuncT &Func, PassT &Pass);
+
+  /// Perform the CFG preparation
+  bool prepare(FuncT &Func, PassT &Pass);
+
+private:
+  void   orderBlocks();
+  void   printOrderedBlocks(llvm::raw_ostream &OS);
+
+  void processAddedToTraversalBlocks();
+
+  int patternMatch(BlockT *CurBlock);
+  int patternMatchGroup(BlockT *CurBlock);
+
+  int serialPatternMatch(BlockT *CurBlock);
+  int ifPatternMatch(BlockT *CurBlock);
+  int switchPatternMatch(BlockT *CurBlock);
+  int loopendPatternMatch(BlockT *CurBlock);
+  int loopPatternMatch(BlockT *CurBlock);
+
+  int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+  int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+  //int loopWithoutBreak(BlockT *);
+
+  inline int getRegister(const class TargetRegisterClass *RegClass);
+  void handleLoopbreak(BlockT *ExitingBlock, LoopT *ExitingLoop,
+                       BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+  void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+                           BlockT *ContBlock, LoopT *contLoop);
+  bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+  int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+                       BlockT *FalseBlock);
+  int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+                          BlockT *FalseBlock);
+  int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+                              BlockT *FalseBlock, BlockT **LandBlockPtr);
+  void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+                                   BlockT *FalseBlock, BlockT *LandBlock,
+                                   bool Detail = false);
+  PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+                          bool AllowSideEntry = true);
+  BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+                        bool AllowSideEntry = true);
+  int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+  void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+  void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+                            BlockT *TrueBlock, BlockT *FalseBlock,
+                            BlockT *LandBlock);
+  void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+  void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+                           BlockT *ExitLandBlock, RegiT SetReg);
+  void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+                           RegiT SetReg);
+  BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+                                std::set<BlockT*> &ExitBlockSet,
+                                BlockT *ExitLandBlk);
+  BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+                                BlockTSmallerVector &ExitingBlocks,
+                                BlockTSmallerVector &ExitBlocks);
+  BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+  void removeUnconditionalBranch(BlockT *SrcBlock);
+  void removeRedundantConditionalBranch(BlockT *SrcBlock);
+  void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+  void removeSuccessor(BlockT *SrcBlock);
+  BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+  BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+  void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+                          InstrIterator InsertPos);
+
+  void recordSccnum(BlockT *SrcBlock, int SCCNum);
+  int getSCCNum(BlockT *srcBlk);
+
+  void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+  bool isRetiredBlock(BlockT *SrcBlock);
+  bool isActiveLoophead(BlockT *CurBlock);
+  bool needMigrateBlock(BlockT *Block);
+
+  void addToTraversalBlock(BlockT *srcBlock);
+
+  BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+                              BlockTSmallerVector &exitBlocks,
+                              std::set<BlockT*> &ExitBlockSet);
+  void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+  BlockT *getLoopLandBlock(LoopT *LoopRep);
+  LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+  void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+  void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+  void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+  void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+  void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+  bool hasBackEdge(BlockT *curBlock);
+  unsigned getLoopDepth  (LoopT *LoopRep);
+  int countActiveBlock(
+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+  BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+  BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+  DomTreeT *domTree;
+  PostDomTreeT *postDomTree;
+  LoopInfoT *loopInfo;
+  PassT *passRep;
+  FuncT *funcRep;
+
+  BlockInfoMap blockInfoMap;
+  LoopLandInfoMap loopLandInfoMap;
+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> addedToTraversalBlks;
+  std::set<unsigned> vregs; // new virtual registers created
+};  //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+  : domTree(NULL), postDomTree(NULL), loopInfo(NULL)
+{
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer()
+{
+  for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+       E = blockInfoMap.end(); I != E; ++I) {
+    delete I->second;
+  }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass)
+{
+  passRep = &pass;
+  funcRep = &func;
+
+  bool changed = false;
+  //func.RenumberBlocks();
+
+  //to do, if not reducible flow graph, make it so ???
+
+  if (DEBUGME) {
+    errs() << "AMDILCFGStructurizer::prepare\n";
+    //func.viewCFG();
+    //func.viewCFGOnly();
+    //func.dump();
+  }
+
+  //FIXME: gcc complains on this.
+  //domTree = &pass.getAnalysis<DomTreeT>();
+  //domTree = CFGTraits::getDominatorTree(pass);
+  //if (DEBUGME) {
+  //    domTree->print(errs());
+  //}
+
+  //FIXME: gcc complains on this.
+  //domTree = &pass.getAnalysis<DomTreeT>();
+  //postDomTree = CFGTraits::getPostDominatorTree(pass);
+  //if (DEBUGME) {
+  //   postDomTree->print(errs());
+  //}
+
+  //FIXME: gcc complains on this.
+  //loopInfo = &pass.getAnalysis<LoopInfoT>();
+  loopInfo = CFGTraits::getLoopInfo(pass);
+  if (DEBUGME) {
+    errs() << "LoopInfo:\n";
+    PrintLoopinfo(*loopInfo, errs());
+  }
+
+  orderBlocks();
+  if (DEBUGME) {
+    errs() << "Ordered blocks:\n";
+    printOrderedBlocks(errs());
+  }
+
+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+  for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+       iterEnd = loopInfo->end();
+       iter != iterEnd; ++iter) {
+    LoopT* loopRep = (*iter);
+    BlockTSmallerVector exitingBlks;
+    loopRep->getExitingBlocks(exitingBlks);
+
+    if (exitingBlks.size() == 0) {
+      BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+      if (dummyExitBlk != NULL)
+        retBlks.push_back(dummyExitBlk);
+    }
+  }
+
+  // Remove unconditional branch instr.
+  // Add dummy exit block iff there are multiple returns.
+
+  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+       iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+       iterBlk != iterEndBlk;
+       ++iterBlk) {
+    BlockT *curBlk = *iterBlk;
+    removeUnconditionalBranch(curBlk);
+    removeRedundantConditionalBranch(curBlk);
+    if (CFGTraits::isReturnBlock(curBlk)) {
+      retBlks.push_back(curBlk);
+    }
+    assert(curBlk->succ_size() <= 2);
+    //assert(curBlk->size() > 0);
+    //removeEmptyBlock(curBlk) ??
+  } //for
+
+  if (retBlks.size() >= 2) {
+    addDummyExitBlock(retBlks);
+    changed = true;
+  }
+
+  return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass)
+{
+  passRep = &pass;
+  funcRep = &func;
+
+  //func.RenumberBlocks();
+
+  //Assume reducible CFG...
+  if (DEBUGME) {
+    errs() << "AMDILCFGStructurizer::run\n";
+    errs() << func.getFunction()->getName().str() << "\n";
+    //func.viewCFG();
+    //func.viewCFGOnly();
+    func.dump();
+  }
+
+#if 1
+  //FIXME: gcc complains on this.
+  //domTree = &pass.getAnalysis<DomTreeT>();
+  domTree = CFGTraits::getDominatorTree(pass);
+  if (DEBUGME) {
+    domTree->print(errs(), (const llvm::Module*)0);
+  }
+#endif
+
+  //FIXME: gcc complains on this.
+  //domTree = &pass.getAnalysis<DomTreeT>();
+  postDomTree = CFGTraits::getPostDominatorTree(pass);
+  if (DEBUGME) {
+    postDomTree->print(errs());
+  }
+
+  //FIXME: gcc complains on this.
+  //loopInfo = &pass.getAnalysis<LoopInfoT>();
+  loopInfo = CFGTraits::getLoopInfo(pass);
+  if (DEBUGME) {
+    errs() << "LoopInfo:\n";
+    PrintLoopinfo(*loopInfo, errs());
+  }
+
+  orderBlocks();
+//#define STRESSTEST
+#ifdef STRESSTEST
+  //Use the worse block ordering to test the algorithm.
+  ReverseVector(orderedBlks);
+#endif
+
+  if (DEBUGME) {
+    errs() << "Ordered blocks:\n";
+    printOrderedBlocks(errs());
+  }
+  int numIter = 0;
+  bool finish = false;
+  BlockT *curBlk;
+  bool makeProgress = false;
+  int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+                                        orderedBlks.end());
+
+  do {
+    ++numIter;
+    if (DEBUGME) {
+      errs() << "numIter = " << numIter
+             << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+    }
+
+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+    iterBlk = orderedBlks.begin();
+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+    iterBlkEnd = orderedBlks.end();
+
+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+    sccBeginIter = iterBlk;
+    BlockT *sccBeginBlk = NULL;
+    int sccNumBlk = 0;  // The number of active blocks, init to a
+    // maximum possible number.
+    int sccNumIter;     // Number of iteration in this SCC.
+
+    while (iterBlk != iterBlkEnd) {
+      curBlk = *iterBlk;
+
+      if (sccBeginBlk == NULL) {
+        sccBeginIter = iterBlk;
+        sccBeginBlk = curBlk;
+        sccNumIter = 0;
+        sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+        if (DEBUGME) {
+          errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+          errs() << "\n";
+        }
+        processAddedToTraversalBlocks();
+      }
+
+      if (!isRetiredBlock(curBlk)) {
+        patternMatch(curBlk);
+      }
+
+      ++iterBlk;
+
+      bool contNextScc = true;
+      if (iterBlk == iterBlkEnd
+          || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+        // Just finish one scc.
+        ++sccNumIter;
+        int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+        if (sccRemainedNumBlk >= 1 && sccRemainedNumBlk >= sccNumBlk) {
+          if (DEBUGME) {
+            errs() << "Can't reduce processing SCC " << getSCCNum(curBlk)
+                   << ", sccNumIter = " << sccNumIter
+                   << ", remain # of blocks " << sccRemainedNumBlk;
+            errs() << "doesn't make any progress\n";
+          }
+          contNextScc = true;
+        } else if (sccRemainedNumBlk >= 1 && sccRemainedNumBlk < sccNumBlk) {
+          sccNumBlk = sccRemainedNumBlk;
+          iterBlk = sccBeginIter;
+          contNextScc = false;
+          if (DEBUGME) {
+            errs() << "repeat processing SCC" << getSCCNum(curBlk)
+                   << "sccNumIter = " << sccNumIter << "\n";
+            //func.viewCFG();
+            //func.viewCFGOnly();
+          }
+        } else {
+          if (DEBUGME) {
+            errs() << "finish processing SCC" << getSCCNum(curBlk)
+                   << ", remain # of blocks " << sccRemainedNumBlk << "\n";
+          }
+          // Finish the current scc.
+          contNextScc = true;
+        }
+      } else {
+        // Continue on next component in the current scc.
+        contNextScc = false;
+      }
+
+      if (contNextScc) {
+        sccBeginBlk = NULL;
+      }
+    } //while, "one iteration" over the function.
+
+    BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+    if (entryBlk->succ_size() == 0) {
+      finish = true;
+      if (DEBUGME) {
+        errs() << "Reduce to one block\n";
+      }
+    } else {
+      int newnumRemainedBlk
+      = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+      // consider cloned blocks ??
+      if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+        makeProgress = true;
+        numRemainedBlk = newnumRemainedBlk;
+      } else {
+        makeProgress = false;
+        if (DEBUGME) {
+          errs() << "No progress\n";
+        }
+      }
+    }
+  } while (!finish && makeProgress);
+
+  // Misc wrap up to maintain the consistency of the Function representation.
+  CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+  // allocate physical registers for virtual registers created during this pass
+  TrivialRegAlloc regAlloc(func, AMDIL::GPRI32RegClass, vregs);
+  regAlloc.run();
+
+  // Detach retired Block, release memory.
+  for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+       iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+    if ((*iterMap).second && (*iterMap).second->isRetired) {
+      assert(((*iterMap).first)->getNumber() != -1);
+      if (DEBUGME) {
+        errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+      }
+      (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
+    }
+    delete (*iterMap).second;
+  }
+  blockInfoMap.clear();
+
+  // clear loopLandInfoMap
+  for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+       iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+    delete (*iterMap).second;
+  }
+  loopLandInfoMap.clear();
+
+  if (DEBUGME) {
+    //func.viewCFG();
+    //func.dump();
+  }
+
+  if (!finish) {
+    MachineFunction *MF = &func;
+    AMDILMachineFunctionInfo *mMFI =
+      MF->getInfo<AMDILMachineFunctionInfo>();
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[IRREDUCIBLE_CF]);
+  }
+
+  return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os)
+{
+  size_t i = 0;
+  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+       iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+       iterBlk != iterBlkEnd;
+       ++iterBlk, ++i) {
+    os << "BB" << (*iterBlk)->getNumber();
+    os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+    if (i != 0 && i % 10 == 0) {
+      os << "\n";
+    } else {
+      os << " ";
+    }
+  }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks()
+{
+  int sccNum = 0;
+  BlockT *bb;
+  for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+       sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+    std::vector<BlockT *> &sccNext = *sccIter;
+    for (typename std::vector<BlockT *>::const_iterator
+         blockIter = sccNext.begin(), blockEnd = sccNext.end();
+         blockIter != blockEnd; ++blockIter) {
+      bb = *blockIter;
+      orderedBlks.push_back(bb);
+      recordSccnum(bb, sccNum);
+    }
+  }
+
+  //walk through all the block in func to check for unreachable
+  for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+       blockEnd1 = FuncGTraits::nodes_end(funcRep);
+       blockIter1 != blockEnd1; ++blockIter1) {
+    BlockT *bb = &(*blockIter1);
+    sccNum = getSCCNum(bb);
+    if (sccNum == INVALIDSCCNUM) {
+      errs() << "unreachable block BB" << bb->getNumber() << "\n";
+    }
+  } //end of for
+} //orderBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::processAddedToTraversalBlocks()
+{
+  typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+  iterBlk = addedToTraversalBlks.begin();
+  typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+  iterBlkEnd = addedToTraversalBlks.end();
+  BlockT *curBlk;
+
+  while (iterBlk != iterBlkEnd) {
+    curBlk = *iterBlk;
+
+    if (!isRetiredBlock(curBlk)) {
+      patternMatch(curBlk);
+    }
+
+    ++iterBlk;
+  }
+} //CFGStructurizer<PassT>::processAddedToTraversalBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk)
+{
+  int numMatch = 0;
+  int curMatch;
+
+  if (DEBUGME) {
+    errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+  }
+
+  while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+    numMatch += curMatch;
+  }
+
+  if (DEBUGME) {
+    errs() << "End patternMatch BB" << curBlk->getNumber()
+           << ", numMatch = " << numMatch << "\n";
+  }
+
+  return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk)
+{
+  int numMatch = 0;
+  numMatch += serialPatternMatch(curBlk);
+  numMatch += ifPatternMatch(curBlk);
+  //numMatch += switchPatternMatch(curBlk);
+  numMatch += loopendPatternMatch(curBlk);
+  numMatch += loopPatternMatch(curBlk);
+  return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk)
+{
+  if (curBlk->succ_size() != 1) {
+    return 0;
+  }
+
+  BlockT *childBlk = *curBlk->succ_begin();
+  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+    return 0;
+  }
+
+  mergeSerialBlock(curBlk, childBlk);
+  ++numSerialPatternMatch;
+  return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk)
+{
+  //two edges
+  if (curBlk->succ_size() != 2) {
+    return 0;
+  }
+
+  if (hasBackEdge(curBlk)) {
+    return 0;
+  }
+
+  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+  if (branchInstr == NULL) {
+    return 0;
+  }
+
+  assert(CFGTraits::isCondBranch(branchInstr));
+
+  BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+  BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+  BlockT *landBlk;
+  int cloned = 0;
+
+  // TODO: Simplify
+  if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+      && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+    landBlk = *trueBlk->succ_begin();
+  } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+    landBlk = NULL;
+  } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+    landBlk = falseBlk;
+    falseBlk = NULL;
+  } else if (falseBlk->succ_size() == 1
+             && *falseBlk->succ_begin() == trueBlk) {
+    landBlk = trueBlk;
+    trueBlk = NULL;
+  } else if (falseBlk->succ_size() == 1
+             && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+    landBlk = *falseBlk->succ_begin();
+  } else if (trueBlk->succ_size() == 1
+             && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+    landBlk = *trueBlk->succ_begin();
+  } else {
+    return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+  }
+
+  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+  // new BB created for landBlk==NULL may introduce new challenge to the
+  // reduction process.
+  if (landBlk != NULL &&
+      ((trueBlk && trueBlk->pred_size() > 1)
+       || (falseBlk && falseBlk->pred_size() > 1))) {
+    cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+  }
+
+  if (trueBlk && trueBlk->pred_size() > 1) {
+    trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+    ++cloned;
+  }
+
+  if (falseBlk && falseBlk->pred_size() > 1) {
+    falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+    ++cloned;
+  }
+
+  mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+  ++numIfPatternMatch;
+
+  numClonedBlock += cloned;
+
+  return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk)
+{
+  return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk)
+{
+  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+  typename std::vector<LoopT *> nestedLoops;
+  while (loopRep) {
+    nestedLoops.push_back(loopRep);
+    loopRep = loopRep->getParentLoop();
+  }
+
+  if (nestedLoops.size() == 0) {
+    return 0;
+  }
+
+  // Process nested loop outside->inside, so "continue" to a outside loop won't
+  // be mistaken as "break" of the current loop.
+  int num = 0;
+  for (typename std::vector<LoopT *>::reverse_iterator
+       iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+       iter != iterEnd; ++iter) {
+    loopRep = *iter;
+
+    if (getLoopLandBlock(loopRep) != NULL) {
+      continue;
+    }
+
+    BlockT *loopHeader = loopRep->getHeader();
+
+    int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+    if (numBreak == -1) {
+      break;
+    }
+
+    int numCont = loopcontPatternMatch(loopRep, loopHeader);
+    num += numBreak + numCont;
+  }
+
+  return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk)
+{
+  if (curBlk->succ_size() != 0) {
+    return 0;
+  }
+
+  int numLoop = 0;
+  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+  while (loopRep && loopRep->getHeader() == curBlk) {
+    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+    if (loopLand) {
+      BlockT *landBlk = loopLand->landBlk;
+      assert(landBlk);
+      if (!isRetiredBlock(landBlk)) {
+        mergeLooplandBlock(curBlk, loopLand);
+        ++numLoop;
+      }
+    }
+    loopRep = loopRep->getParentLoop();
+  }
+
+  numLoopPatternMatch += numLoop;
+
+  return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+    BlockT *loopHeader)
+{
+  BlockTSmallerVector exitingBlks;
+  loopRep->getExitingBlocks(exitingBlks);
+
+  if (DEBUGME) {
+    errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+  }
+
+  if (exitingBlks.size() == 0) {
+    setLoopLandBlock(loopRep);
+    return 0;
+  }
+
+  // Compute the corresponding exitBlks and exit block set.
+  BlockTSmallerVector exitBlks;
+  std::set<BlockT *> exitBlkSet;
+  for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+       iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+    BlockT *exitingBlk = *iter;
+    BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+    exitBlks.push_back(exitBlk);
+    exitBlkSet.insert(exitBlk);  //non-duplicate insert
+  }
+
+  assert(exitBlkSet.size() > 0);
+  assert(exitBlks.size() == exitingBlks.size());
+
+  if (DEBUGME) {
+    errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+  }
+
+  // Find exitLandBlk.
+  BlockT *exitLandBlk = NULL;
+  int numCloned = 0;
+  int numSerial = 0;
+
+  if (exitBlkSet.size() == 1) {
+    exitLandBlk = *exitBlkSet.begin();
+  } else {
+    exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+    if (exitLandBlk == NULL) {
+      return -1;
+    }
+
+    bool allInPath = true;
+    bool allNotInPath = true;
+    for (typename std::set<BlockT*>::const_iterator
+         iter = exitBlkSet.begin(),
+         iterEnd = exitBlkSet.end();
+         iter != iterEnd; ++iter) {
+      BlockT *exitBlk = *iter;
+
+      PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+      if (DEBUGME) {
+        errs() << "BB" << exitBlk->getNumber()
+               << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+               << pathKind << "\n";
+      }
+
+      allInPath = allInPath && (pathKind == SinglePath_InPath);
+      allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+      if (!allInPath && !allNotInPath) {
+        if (DEBUGME) {
+          errs() << "singlePath check fail\n";
+        }
+        return -1;
+      }
+    } // check all exit blocks
+
+    if (allNotInPath) {
+      // TODO: Simplify, maybe separate function?
+      //funcRep->viewCFG();
+      LoopT *parentLoopRep = loopRep->getParentLoop();
+      BlockT *parentLoopHeader = NULL;
+      if (parentLoopRep)
+        parentLoopHeader = parentLoopRep->getHeader();
+
+      if (exitLandBlk == parentLoopHeader &&
+          (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+                         loopRep,
+                         exitBlkSet,
+                         exitLandBlk)) != NULL) {
+        if (DEBUGME) {
+          errs() << "relocateLoopcontBlock success\n";
+        }
+      } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+                                exitingBlks,
+                                exitBlks)) != NULL) {
+        if (DEBUGME) {
+          errs() << "insertEndbranchBlock success\n";
+        }
+      } else {
+        if (DEBUGME) {
+          errs() << "loop exit fail\n";
+        }
+        return -1;
+      }
+    } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+                              exitingBlks,
+                              exitBlks)) != NULL) {
+      //current addLoopEndbranchBlock always does something and return non-NULL
+      if (DEBUGME) {
+        errs() << "insertEndbranchBlock success\n";
+      }
+    }
+
+    // Handle side entry to exit path.
+    exitBlks.clear();
+    exitBlkSet.clear();
+    for (typename BlockTSmallerVector::iterator iterExiting =
+           exitingBlks.begin(),
+         iterExitingEnd = exitingBlks.end();
+         iterExiting != iterExitingEnd; ++iterExiting) {
+      BlockT *exitingBlk = *iterExiting;
+      BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+      BlockT *newExitBlk = exitBlk;
+
+      if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+        newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+        ++numCloned;
+      }
+
+      numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+      exitBlks.push_back(newExitBlk);
+      exitBlkSet.insert(newExitBlk);
+    }
+
+    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+         iterExitEnd = exitBlks.end();
+         iterExit != iterExitEnd; ++iterExit) {
+      BlockT *exitBlk = *iterExit;
+      numSerial += serialPatternMatch(exitBlk);
+    }
+
+    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+         iterExitEnd = exitBlks.end();
+         iterExit != iterExitEnd; ++iterExit) {
+      BlockT *exitBlk = *iterExit;
+      if (exitBlk->pred_size() > 1) {
+        if (exitBlk != exitLandBlk) {
+          return -1;
+        }
+      } else {
+        if (exitBlk != exitLandBlk &&
+            (exitBlk->succ_size() != 1 ||
+             *exitBlk->succ_begin() != exitLandBlk)) {
+          return -1;
+        }
+      }
+    }
+  } // else
+
+  // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
+  exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+  // Fold break into the breaking block. Leverage across level breaks.
+  assert(exitingBlks.size() == exitBlks.size());
+  for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+       iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+       iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+    BlockT *exitBlk = *iterExit;
+    BlockT *exitingBlk = *iterExiting;
+    assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+    LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+    handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+  }
+
+  int numBreak = static_cast<int>(exitingBlks.size());
+  numLoopbreakPatternMatch += numBreak;
+  numClonedBlock += numCloned;
+  return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+    BlockT *loopHeader)
+{
+  int numCont = 0;
+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+  for (typename InvBlockGTraits::ChildIteratorType iter =
+         InvBlockGTraits::child_begin(loopHeader),
+       iterEnd = InvBlockGTraits::child_end(loopHeader);
+       iter != iterEnd; ++iter) {
+    BlockT *curBlk = *iter;
+    if (loopRep->contains(curBlk)) {
+      handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+                          loopHeader, loopRep);
+      contBlk.push_back(curBlk);
+      ++numCont;
+    }
+  }
+
+  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+       iter = contBlk.begin(), iterEnd = contBlk.end();
+       iter != iterEnd; ++iter) {
+    (*iter)->removeSuccessor(loopHeader);
+  }
+
+  numLoopcontPatternMatch += numCont;
+
+  return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+    BlockT *src2Blk)
+{
+  // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+  // same loop with LoopLandInfo without explicitly keeping track of
+  // loopContBlks and loopBreakBlks, this is a method to get the information.
+  //
+  if (src1Blk->succ_size() == 0) {
+    LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+    if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+      LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+      if (theEntry != NULL) {
+        if (DEBUGME) {
+          errs() << "isLoopContBreakBlock yes src1 = BB"
+                 << src1Blk->getNumber()
+                 << " src2 = BB" << src2Blk->getNumber() << "\n";
+        }
+        return true;
+      }
+    }
+  }
+  return false;
+}  //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+    BlockT *trueBlk,
+    BlockT *falseBlk)
+{
+  int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+  if (num == 0) {
+    if (DEBUGME) {
+      errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+    }
+    num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+
+    if (num == 0) {
+      if (DEBUGME) {
+        errs() << "handleJumpintoIf check NULL as common postdom:";
+      }
+      BlockT* trueEnd;
+      BlockT* falseEnd;
+      if ((trueEnd = singlePathEnd(trueBlk, NULL)) &&
+          (falseEnd = singlePathEnd(falseBlk, NULL)) &&
+          loopInfo->getLoopFor(trueEnd) == loopInfo->getLoopFor(falseEnd)) {
+        if (DEBUGME) {
+          errs() << " working\n";
+        }
+        num += cloneOnSideEntryTo(headBlk, trueBlk, NULL);
+        num += cloneOnSideEntryTo(headBlk, falseBlk, NULL);
+
+        numClonedBlock += num;
+        num += serialPatternMatch(*headBlk->succ_begin());
+        num += serialPatternMatch(*(headBlk->succ_begin()+1));
+        num += ifPatternMatch(headBlk);
+        assert(num > 0); //
+      } else {
+        if (DEBUGME) {
+          errs() << " not working\n";
+        }
+      }
+    } //check NULL
+
+  }
+  return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+    BlockT *trueBlk,
+    BlockT *falseBlk)
+{
+  int num = 0;
+  BlockT *downBlk;
+
+  //trueBlk could be the common post dominator
+  downBlk = trueBlk;
+
+  if (DEBUGME) {
+    errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+           << " true = BB" << trueBlk->getNumber()
+           << ", numSucc=" << trueBlk->succ_size()
+           << " false = BB" << falseBlk->getNumber() << "\n";
+  }
+
+  while (downBlk) {
+    if (DEBUGME) {
+      errs() << "check down = BB" << downBlk->getNumber();
+    }
+
+    if (//postDomTree->dominates(downBlk, falseBlk) &&
+      singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+      if (DEBUGME) {
+        errs() << " working\n";
+      }
+
+      num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+      num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+      numClonedBlock += num;
+      num += serialPatternMatch(*headBlk->succ_begin());
+      num += serialPatternMatch(*(headBlk->succ_begin()+1));
+      num += ifPatternMatch(headBlk);
+      assert(num > 0); //
+
+      break;
+    }
+    if (DEBUGME) {
+      errs() << " not working\n";
+    }
+    downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+  } // walk down the postDomTree
+
+  return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+    BlockT *trueBlk,
+    BlockT *falseBlk,
+    BlockT *landBlk,
+    bool detail)
+{
+  errs() << "head = BB" << headBlk->getNumber()
+         << " size = " << headBlk->size();
+  if (detail) {
+    errs() << "\n";
+    headBlk->print(errs());
+    errs() << "\n";
+  }
+
+  if (trueBlk) {
+    errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+           << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+    if (detail) {
+      errs() << "\n";
+      trueBlk->print(errs());
+      errs() << "\n";
+    }
+  }
+  if (falseBlk) {
+    errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+           << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+    if (detail) {
+      errs() << "\n";
+      falseBlk->print(errs());
+      errs() << "\n";
+    }
+  }
+  if (landBlk) {
+    errs() << ", land = BB" << landBlk->getNumber() << " size = "
+           << landBlk->size() << " numPred = " << landBlk->pred_size();
+    if (detail) {
+      errs() << "\n";
+      landBlk->print(errs());
+      errs() << "\n";
+    }
+  }
+
+  errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+    BlockT *trueBlk,
+    BlockT *falseBlk,
+    BlockT **plandBlk)
+{
+  bool migrateTrue = false;
+  bool migrateFalse = false;
+
+  BlockT *landBlk = *plandBlk;
+
+  assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+         && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+  if (trueBlk == falseBlk) {
+    return 0;
+  }
+
+#if 0
+  if (DEBUGME) {
+    errs() << "improveSimpleJumpintoIf: ";
+    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+  }
+#endif
+
+  // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
+  // May consider the # landBlk->pred_size() as it represents the number of
+  // assignment initReg = .. needed to insert.
+  migrateTrue = needMigrateBlock(trueBlk);
+  migrateFalse = needMigrateBlock(falseBlk);
+
+  if (!migrateTrue && !migrateFalse) {
+    return 0;
+  }
+
+  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+  // have more than one predecessors.  without doing this, its predecessor
+  // rather than headBlk will have undefined value in initReg.
+  if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+    migrateTrue = true;
+  }
+  if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+    migrateFalse = true;
+  }
+
+  if (DEBUGME) {
+    errs() << "before improveSimpleJumpintoIf: ";
+    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+    //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+  }
+
+  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+  //
+  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+  //      {initReg = 0; org falseBlk branch }
+  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+  //      => org landBlk
+  //      if landBlk->pred_size() > 2, put the about if-else inside
+  //      if (initReg !=2) {...}
+  //
+  // add initReg = initVal to headBlk
+  unsigned initReg = getRegister(&AMDIL::GPRI32RegClass);
+  if (!migrateTrue || !migrateFalse) {
+    int initVal = migrateTrue ? 0 : 1;
+    CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+  }
+
+  int numNewBlk = 0;
+
+  if (landBlk == NULL) {
+    landBlk = funcRep->CreateMachineBasicBlock();
+    funcRep->push_back(landBlk);  //insert to function
+
+    if (trueBlk) {
+      trueBlk->addSuccessor(landBlk);
+    } else {
+      headBlk->addSuccessor(landBlk);
+    }
+
+    if (falseBlk) {
+      falseBlk->addSuccessor(landBlk);
+    } else {
+      headBlk->addSuccessor(landBlk);
+    }
+
+    numNewBlk ++;
+  }
+
+  bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+  //insert AMDIL::ENDIF to avoid special case "input landBlk == NULL"
+  typename BlockT::iterator insertPos =
+    CFGTraits::getInstrPos
+    (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDIL::ENDIF, passRep));
+
+  if (landBlkHasOtherPred) {
+    unsigned immReg = getRegister(&AMDIL::GPRI32RegClass);
+    CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+    unsigned cmpResReg = getRegister(&AMDIL::GPRI32RegClass);
+    CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+                                        initReg, immReg);
+    CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+                                      AMDIL::IF_LOGICALZ_i32, passRep,
+                                      cmpResReg, DebugLoc());
+  }
+
+  CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDIL::IF_LOGICALNZ_i32,
+                                    passRep, initReg, DebugLoc());
+
+  if (migrateTrue) {
+    migrateInstruction(trueBlk, landBlk, insertPos);
+    // need to uncondionally insert the assignment to ensure a path from its
+    // predecessor rather than headBlk has valid value in initReg if
+    // (initVal != 1).
+    CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+  }
+  CFGTraits::insertInstrBefore(insertPos, AMDIL::ELSE, passRep);
+
+  if (migrateFalse) {
+    migrateInstruction(falseBlk, landBlk, insertPos);
+    // need to uncondionally insert the assignment to ensure a path from its
+    // predecessor rather than headBlk has valid value in initReg if
+    // (initVal != 0)
+    CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+  }
+  //CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep);
+
+  if (landBlkHasOtherPred) {
+    // add endif
+    CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep);
+
+    // put initReg = 2 to other predecessors of landBlk
+    for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+         predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+         ++predIter) {
+      BlockT *curBlk = *predIter;
+      if (curBlk != trueBlk && curBlk != falseBlk) {
+        CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+      }
+    } //for
+  }
+  if (DEBUGME) {
+    errs() << "result from improveSimpleJumpintoIf: ";
+    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+    //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+  }
+
+  // update landBlk
+  *plandBlk = landBlk;
+
+  return numNewBlk;
+} //improveSimpleJumpintoIf
+
+// Since we are after the register allocator, we don't want to use virtual
+// registers as it is possible that we can get a virtual register that is
+// passed the 65K limit of IL text format. So instead we serach through the
+// register class for an unused physical register and mark it as used. If we
+// cannot find a register, then we do some funky math on the virtual registers
+// so that we don't clobber the physicals and make sure we don't go over the
+// 65k limit.
+template<class PassT>
+inline int CFGStructurizer<PassT>::getRegister(
+  const class TargetRegisterClass *RegClass)
+{
+  unsigned reg = funcRep->getRegInfo().createVirtualRegister(RegClass);
+  vregs.insert(reg);
+  if (DEBUGME)
+    errs() << "created virtual register "
+           << TargetRegisterInfo::virtReg2Index(reg) << "\n";
+  return reg;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+    LoopT *exitingLoop,
+    BlockT *exitBlk,
+    LoopT *exitLoop,
+    BlockT *landBlk)
+{
+  if (DEBUGME) {
+    errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+           << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+  }
+
+  RegiT initReg = INVALIDREGNUM;
+  if (exitingLoop != exitLoop) {
+    initReg = getRegister(&AMDIL::GPRI32RegClass);
+    assert(initReg != INVALIDREGNUM);
+    addLoopBreakInitReg(exitLoop, initReg);
+    while (exitingLoop != exitLoop && exitingLoop) {
+      addLoopBreakOnReg(exitingLoop, initReg);
+      exitingLoop = exitingLoop->getParentLoop();
+    }
+    assert(exitingLoop == exitLoop);
+  }
+
+  mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+    LoopT *contingLoop,
+    BlockT *contBlk,
+    LoopT *contLoop)
+{
+  if (DEBUGME) {
+    errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+           << " header = BB" << contBlk->getNumber() << "\n";
+
+    errs() << "Trying to continue loop-depth = "
+           << getLoopDepth(contLoop)
+           << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+  }
+
+  RegiT initReg = INVALIDREGNUM;
+  if (contingLoop != contLoop) {
+    initReg = getRegister(&AMDIL::GPRI32RegClass);
+    assert(initReg != INVALIDREGNUM);
+    addLoopContInitReg(contLoop, initReg);
+    while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+      addLoopBreakOnReg(contingLoop, initReg);  //not addLoopContOnReg
+      contingLoop = contingLoop->getParentLoop();
+    }
+    assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+    addLoopContOnReg(contingLoop, initReg);
+  }
+
+  settleLoopcontBlock(contingBlk, contBlk, initReg);
+  //contingBlk->removeSuccessor(loopHeader);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk)
+{
+  if (DEBUGME) {
+    errs() << "serialPattern BB" << dstBlk->getNumber()
+           << " <= BB" << srcBlk->getNumber() << "\n";
+  }
+  //removeUnconditionalBranch(dstBlk);
+  dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
+
+  dstBlk->removeSuccessor(srcBlk);
+  CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+  removeSuccessor(srcBlk);
+  retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+    BlockT *curBlk,
+    BlockT *trueBlk,
+    BlockT *falseBlk,
+    BlockT *landBlk)
+{
+  if (DEBUGME) {
+    errs() << "ifPattern BB" << curBlk->getNumber();
+    errs() << "{  ";
+    if (trueBlk) {
+      errs() << "BB" << trueBlk->getNumber();
+    }
+    errs() << "  } else ";
+    errs() << "{  ";
+    if (falseBlk) {
+      errs() << "BB" << falseBlk->getNumber();
+    }
+    errs() << "  }\n ";
+    errs() << "landBlock: ";
+    if (landBlk == NULL) {
+      errs() << "NULL";
+    } else {
+      errs() << "BB" << landBlk->getNumber();
+    }
+    errs() << "\n";
+  }
+
+  int oldOpcode = branchInstr->getOpcode();
+  DebugLoc branchDL = branchInstr->getDebugLoc();
+  bool badDebugLoc = branchDL == DebugLoc() ? true : false;
+  if (badDebugLoc) {
+    InstrT * brkInstr = getLastBreakInstr(curBlk);
+    if (brkInstr) {
+      branchDL = brkInstr->getDebugLoc();
+    }
+  }
+
+//    transform to
+//    if cond
+//       trueBlk
+//    else
+//       falseBlk
+//    endif
+//    landBlk
+
+  typename BlockT::iterator branchInstrPos =
+    CFGTraits::getInstrPos(curBlk, branchInstr);
+  CFGTraits::insertCondBranchBefore(branchInstrPos,
+                                    CFGTraits::getBranchNzeroOpcode(oldOpcode),
+                                    passRep,
+                                    branchDL);
+
+  if (trueBlk) {
+    curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
+    curBlk->removeSuccessor(trueBlk);
+    if (landBlk && trueBlk->succ_size()!=0) {
+      trueBlk->removeSuccessor(landBlk);
+    }
+    retireBlock(curBlk, trueBlk);
+  }
+  if (badDebugLoc) {
+    CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ELSE, passRep, branchDL);
+  } else {
+    CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ELSE, passRep);
+  }
+
+  if (falseBlk) {
+    curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
+                   falseBlk->end());
+    curBlk->removeSuccessor(falseBlk);
+    if (landBlk && falseBlk->succ_size() != 0) {
+      falseBlk->removeSuccessor(landBlk);
+    }
+    retireBlock(curBlk, falseBlk);
+  }
+  CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep);
+
+  //curBlk->remove(branchInstrPos);
+  branchInstr->eraseFromParent();
+
+  if (landBlk && trueBlk && falseBlk) {
+    curBlk->addSuccessor(landBlk);
+  }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+    LoopLandInfo *loopLand)
+{
+  BlockT *landBlk = loopLand->landBlk;
+
+  if (DEBUGME) {
+    errs() << "loopPattern header = BB" << dstBlk->getNumber()
+           << " land = BB" << landBlk->getNumber() << "\n";
+  }
+
+  // Loop contInitRegs are init at the beginning of the loop.
+  for (typename std::set<RegiT>::const_iterator iter =
+         loopLand->contInitRegs.begin(),
+       iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+  }
+
+  // Loop endbranchInitRegs are init after entering the loop.
+  for (typename std::set<RegiT>::const_iterator iter =
+         loopLand->endbranchInitRegs.begin(),
+       iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+  }
+
+  /* we last inserterd the DebugLoc in the
+   * BREAK_LOGICALZ_i32 or AMDIL::BREAK_LOGICALNZ statement in the current dstBlk.
+   * search for the DebugLoc in the that statement.
+   * if not found, we have to insert the empty/default DebugLoc */
+  InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+  DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+  // fogbugz #7310: work-around discussed with Uri regarding do-while loops:
+  // in case the the WHILELOOP line number is greater than do.body line numbers,
+  // take the do.body line number instead.
+  MachineBasicBlock::iterator iter = dstBlk->begin();
+  MachineInstr *instrDoBody = &(*iter);
+  DebugLoc DLBreakDoBody = (instrDoBody) ? instrDoBody->getDebugLoc() : DebugLoc();
+  DebugLoc DLBreakMin = (DLBreak.getLine() < DLBreakDoBody.getLine()) ? DLBreak : DLBreakDoBody;
+
+  CFGTraits::insertInstrBefore(dstBlk, AMDIL::WHILELOOP, passRep, DLBreakMin);
+  // Loop breakInitRegs are init before entering the loop.
+  for (typename std::set<RegiT>::const_iterator iter =
+         loopLand->breakInitRegs.begin(),
+       iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
+    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+  }
+
+  /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+   * search for the DebugLoc in the continue statement.
+   * if not found, we have to insert the empty/default DebugLoc */
+  InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+  DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+  CFGTraits::insertInstrEnd(dstBlk, AMDIL::ENDLOOP, passRep, DLContinue);
+  // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+  // loop.
+  for (typename std::set<RegiT>::const_iterator iter =
+         loopLand->breakOnRegs.begin(),
+       iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+    CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::BREAK_LOGICALNZ_i32, passRep,
+                                   *iter);
+  }
+
+  // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+  // loop.
+  for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+       iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+    CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::CONTINUE_LOGICALNZ_i32,
+                                   passRep, *iter);
+  }
+
+  dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+  for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+       iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+    dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of.
+  }
+
+  removeSuccessor(landBlk);
+  retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+    BlockT *exitBlk,
+    BlockT *exitLandBlk,
+    RegiT  setReg)
+{
+  if (DEBUGME) {
+    errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+           << " exit = BB" << exitBlk->getNumber()
+           << " land = BB" << exitLandBlk->getNumber() << "\n";
+  }
+
+  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+  assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+  DebugLoc DL = branchInstr->getDebugLoc();
+
+  BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+  int oldOpcode = branchInstr->getOpcode();
+
+  //    transform exitingBlk to
+  //    if ( ) {
+  //       exitBlk (if exitBlk != exitLandBlk)
+  //       setReg = 1
+  //       break
+  //    }endif
+  //    successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+  typename BlockT::iterator branchInstrPos =
+    CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+  if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+    //break_logical
+    int newOpcode =
+      (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
+      : CFGTraits::getBreakZeroOpcode(oldOpcode);
+    CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+  } else {
+    int newOpcode =
+      (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+      : CFGTraits::getBranchZeroOpcode(oldOpcode);
+    CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+    if (exitBlk != exitLandBlk) {
+      //splice is insert-before ...
+      exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+                         exitBlk->end());
+    }
+    if (setReg != INVALIDREGNUM) {
+      CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+    }
+    CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::BREAK, passRep);
+    CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep);
+  } //if_logical
+
+  //now branchInst can be erase safely
+  //exitingBlk->eraseFromParent(branchInstr);
+  branchInstr->eraseFromParent();
+
+  //now take care of successors, retire blocks
+  exitingBlk->removeSuccessor(exitBlk);
+  if (exitBlk != exitLandBlk) {
+    //splice is insert-before ...
+    exitBlk->removeSuccessor(exitLandBlk);
+    retireBlock(exitingBlk, exitBlk);
+  }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+    BlockT *contBlk,
+    RegiT   setReg)
+{
+  if (DEBUGME) {
+    errs() << "settleLoopcontBlock conting = BB"
+           << contingBlk->getNumber()
+           << ", cont = BB" << contBlk->getNumber() << "\n";
+  }
+
+  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+  if (branchInstr) {
+    assert(CFGTraits::isCondBranch(branchInstr));
+    typename BlockT::iterator branchInstrPos =
+      CFGTraits::getInstrPos(contingBlk, branchInstr);
+    BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+    int oldOpcode = branchInstr->getOpcode();
+    DebugLoc DL = branchInstr->getDebugLoc();
+
+    //    transform contingBlk to
+    //     if () {
+    //          move instr after branchInstr
+    //          continue
+    //        or
+    //          setReg = 1
+    //          break
+    //     }endif
+    //     successor = {orgSuccessor(contingBlk) - loopHeader}
+
+    bool useContinueLogical =
+      (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+    if (useContinueLogical == false) {
+      int branchOpcode =
+        trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+        : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep,
+                                        DL);
+
+      if (setReg != INVALIDREGNUM) {
+        CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+        CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep, DL);
+      } else {
+        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+        CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep, DL);
+      }
+
+      CFGTraits::insertInstrEnd(contingBlk, AMDIL::ENDIF, passRep, DL);
+    } else {
+      int branchOpcode =
+        trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+        : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode,
+                                        passRep, DL);
+    }
+
+    //contingBlk->eraseFromParent(branchInstr);
+    branchInstr->eraseFromParent();
+  } else {
+    /* if we've arrived here then we've already erased the branch instruction
+    * travel back up the basic block to see the last reference of our debug location
+    * we've just inserted that reference here so it should be representative */
+    if (setReg != INVALIDREGNUM) {
+      CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+      CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep,
+                                CFGTraits::getLastDebugLocInBB(contingBlk));
+    } else {
+      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+      CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep,
+                                CFGTraits::getLastDebugLocInBB(contingBlk));
+    }
+  } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+//    (1) set newBlk common successor of BBs in exitBlkSet
+//    (2) change the continue-instr in BBs in exitBlkSet to break-instr
+//    (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+    LoopT *loopRep,
+    std::set<BlockT *> &exitBlkSet,
+    BlockT *exitLandBlk)
+{
+  std::set<BlockT *> endBlkSet;
+
+//  BlockT *parentLoopHead = parentLoopRep->getHeader();
+
+
+  for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+       iterEnd = exitBlkSet.end();
+       iter != iterEnd; ++iter) {
+    BlockT *exitBlk = *iter;
+    BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+    if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+      return NULL;
+
+    endBlkSet.insert(endBlk);
+  }
+
+  BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+  funcRep->push_back(newBlk);  //insert to function
+  CFGTraits::insertInstrEnd(newBlk, AMDIL::CONTINUE, passRep);
+  SHOWNEWBLK(newBlk, "New continue block: ");
+
+  for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+       iterEnd = endBlkSet.end();
+       iter != iterEnd; ++iter) {
+    BlockT *endBlk = *iter;
+    InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+    if (contInstr) {
+      contInstr->eraseFromParent();
+    }
+    endBlk->addSuccessor(newBlk);
+    if (DEBUGME) {
+      errs() << "Add new continue Block to BB"
+             << endBlk->getNumber() << " successors\n";
+    }
+  }
+
+  return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+    BlockTSmallerVector &exitingBlks,
+    BlockTSmallerVector &exitBlks)
+{
+  const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+  RegiT endBranchReg = getRegister(&AMDIL::GPRI32RegClass);
+  assert(endBranchReg != INVALIDREGNUM);
+
+  // reg = 0 before entering the loop
+  addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+  uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+  assert(numBlks >=2 && numBlks == exitBlks.size());
+
+  BlockT *preExitingBlk = exitingBlks[0];
+  BlockT *preExitBlk = exitBlks[0];
+  BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+  funcRep->push_back(preBranchBlk);  //insert to function
+  SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+  BlockT *newLandBlk = preBranchBlk;
+
+  CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+                                        newLandBlk);
+  preExitingBlk->removeSuccessor(preExitBlk);
+  preExitingBlk->addSuccessor(newLandBlk);
+
+  //it is redundant to add reg = 0 to exitingBlks[0]
+
+  // For 1..n th exiting path (the last iteration handles two pathes) create the
+  // branch to the previous path and the current path.
+  for (uint32_t i = 1; i < numBlks; ++i) {
+    BlockT *curExitingBlk = exitingBlks[i];
+    BlockT *curExitBlk = exitBlks[i];
+    BlockT *curBranchBlk;
+
+    if (i == numBlks - 1) {
+      curBranchBlk = curExitBlk;
+    } else {
+      curBranchBlk = funcRep->CreateMachineBasicBlock();
+      addToTraversalBlock(curBranchBlk);
+      funcRep->push_back(curBranchBlk);  //insert to function
+      SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+    }
+
+    // Add reg = i to exitingBlks[i].
+    CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, endBranchReg, i);
+
+    // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+    // (exitingBlks[i], newLandBlk).
+    CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+                                          newLandBlk);
+    curExitingBlk->removeSuccessor(curExitBlk);
+    curExitingBlk->addSuccessor(newLandBlk);
+
+    // add to preBranchBlk the branch instruction:
+    // if (endBranchReg == preVal)
+    //    preExitBlk
+    // else
+    //    curBranchBlk
+    //
+    // preValReg = i - 1
+
+    DebugLoc DL;
+    RegiT preValReg = getRegister(&AMDIL::GPRI32RegClass);
+    MachineInstr* preValInst
+    = BuildMI(preBranchBlk, DL, tii->get(AMDIL::LOADCONST_i32), preValReg)
+      .addImm(i - 1); //preVal
+    SHOWNEWINSTR(preValInst);
+
+    // condResReg = (endBranchReg == preValReg)
+    RegiT condResReg = getRegister(&AMDIL::GPRI32RegClass);
+    MachineInstr* cmpInst
+    = BuildMI(preBranchBlk, DL, tii->get(AMDIL::IEQ), condResReg)
+      .addReg(endBranchReg).addReg(preValReg);
+    SHOWNEWINSTR(cmpInst);
+
+    MachineInstr* condBranchInst
+    = BuildMI(preBranchBlk, DL, tii->get(AMDIL::BRANCH_COND_i32))
+      .addMBB(preExitBlk).addReg(condResReg);
+    SHOWNEWINSTR(condBranchInst);
+
+    preBranchBlk->addSuccessor(preExitBlk);
+    preBranchBlk->addSuccessor(curBranchBlk);
+
+    // Update preExitingBlk, preExitBlk, preBranchBlk.
+    preExitingBlk = curExitingBlk;
+    preExitBlk = curExitBlk;
+    preBranchBlk = curBranchBlk;
+
+  }  //end for 1 .. n blocks
+
+  return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+                                     bool allowSideEntry)
+{
+  assert(dstBlk);
+
+  if (srcBlk == dstBlk) {
+    return SinglePath_InPath;
+  }
+
+  while (srcBlk && srcBlk->succ_size() == 1) {
+    srcBlk = *srcBlk->succ_begin();
+    if (srcBlk == dstBlk) {
+      return SinglePath_InPath;
+    }
+
+    if (!allowSideEntry && srcBlk->pred_size() > 1) {
+      return Not_SinglePath;
+    }
+  }
+
+  if (srcBlk && srcBlk->succ_size()==0) {
+    return SinglePath_NotInPath;
+  }
+
+  return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+                                      bool allowSideEntry)
+{
+  //assert(dstBlk);
+
+  if (srcBlk == dstBlk) {
+    return srcBlk;
+  }
+
+  if (srcBlk->succ_size() == 0) {
+    return srcBlk;
+  }
+
+  while (srcBlk && srcBlk->succ_size() == 1) {
+    BlockT *preBlk = srcBlk;
+
+    srcBlk = *srcBlk->succ_begin();
+    if (srcBlk == NULL) {
+      return preBlk;
+    }
+
+    if (!allowSideEntry && srcBlk->pred_size() > 1) {
+      return NULL;
+    }
+  }
+
+  if (srcBlk && srcBlk->succ_size()==0) {
+    return srcBlk;
+  }
+
+  return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+    BlockT *dstBlk)
+{
+  int cloned = 0;
+  assert(preBlk->isSuccessor(srcBlk));
+  while (srcBlk && srcBlk != dstBlk) {
+    assert(srcBlk->succ_size() == 1 ||
+           (srcBlk->succ_size() == 0 && dstBlk == NULL));
+    if (srcBlk->pred_size() > 1) {
+      srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+      ++cloned;
+    }
+
+    preBlk = srcBlk;
+    if (srcBlk->succ_size() == 1)
+      srcBlk = *srcBlk->succ_begin();
+    else
+      srcBlk = NULL;
+  }
+
+  return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+    BlockT *predBlk)
+{
+  assert(predBlk->isSuccessor(curBlk) &&
+         "succBlk is not a prececessor of curBlk");
+
+  BlockT *cloneBlk = CFGTraits::clone(curBlk);  //clone instructions
+  CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+  //srcBlk, oldBlk, newBlk
+
+  predBlk->removeSuccessor(curBlk);
+  predBlk->addSuccessor(cloneBlk);
+
+  // add all successor to cloneBlk
+  CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+  numClonedInstr += curBlk->size();
+
+  if (DEBUGME) {
+    errs() << "Cloned block: " << "BB"
+           << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+  }
+
+  SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+  return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+    BlockT *exitingBlk)
+{
+  BlockT *exitBlk = NULL;
+
+  for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+       iterSuccEnd = exitingBlk->succ_end();
+       iterSucc != iterSuccEnd; ++iterSucc) {
+    BlockT *curBlk = *iterSucc;
+    if (!loopRep->contains(curBlk)) {
+      assert(exitBlk == NULL);
+      exitBlk = curBlk;
+    }
+  }
+
+  assert(exitBlk != NULL);
+
+  return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+    BlockT *dstBlk,
+    InstrIterator insertPos)
+{
+  InstrIterator spliceEnd;
+  //look for the input branchinstr, not the AMDIL branchinstr
+  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+  if (branchInstr == NULL) {
+    if (DEBUGME) {
+      errs() << "migrateInstruction don't see branch instr\n" ;
+    }
+    spliceEnd = srcBlk->end();
+  } else {
+    if (DEBUGME) {
+      errs() << "migrateInstruction see branch instr\n" ;
+      branchInstr->dump();
+    }
+    spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+  }
+  if (DEBUGME) {
+    errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+           << "srcSize = " << srcBlk->size() << "\n";
+  }
+
+  //splice insert before insertPos
+  dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+  if (DEBUGME) {
+    errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+           << "srcSize = " << srcBlk->size() << "\n";
+  }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+//   B1:
+//        uncond_br LoopHeader
+//
+// to
+//   B1:
+//        cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep)
+{
+  BlockT *loopHeader;
+  BlockT *loopLatch;
+  loopHeader = LoopRep->getHeader();
+  loopLatch = LoopRep->getLoopLatch();
+  BlockT *dummyExitBlk = NULL;
+  if (loopHeader!=NULL && loopLatch!=NULL) {
+    InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+    if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+      dummyExitBlk = funcRep->CreateMachineBasicBlock();
+      funcRep->push_back(dummyExitBlk);  //insert to function
+      SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+      if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+      typename BlockT::iterator insertPos =
+        CFGTraits::getInstrPos(loopLatch, branchInstr);
+      unsigned immReg = getRegister(&AMDIL::GPRI32RegClass);
+      CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+      InstrT *newInstr =
+        CFGTraits::insertInstrBefore(insertPos, AMDIL::BRANCH_COND_i32,
+                                     passRep);
+      MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
+      SHOWNEWINSTR(newInstr);
+      branchInstr->eraseFromParent();
+      loopLatch->addSuccessor(dummyExitBlk);
+    }
+  }
+
+  return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk)
+{
+  InstrT *branchInstr;
+
+  // I saw two unconditional branch in one basic block in example
+  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+  while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+         && CFGTraits::isUncondBranch(branchInstr)) {
+    if (DEBUGME) {
+      errs() << "Removing unconditional branch instruction" ;
+      branchInstr->dump();
+    }
+    branchInstr->eraseFromParent();
+  }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk)
+{
+  if (srcBlk->succ_size() == 2) {
+    BlockT *blk1 = *srcBlk->succ_begin();
+    BlockT *blk2 = *(srcBlk->succ_begin()+1);
+
+    if (blk1 == blk2) {
+      InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+      assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+      if (DEBUGME) {
+        errs() << "Removing unneeded conditional branch instruction" ;
+        branchInstr->dump();
+      }
+      branchInstr->eraseFromParent();
+      SHOWNEWBLK(blk1, "Removing redundant successor");
+      srcBlk->removeSuccessor(blk1);
+    }
+  }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+    DEFAULT_VEC_SLOTS> &retBlks)
+{
+  BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+  funcRep->push_back(dummyExitBlk);  //insert to function
+  CFGTraits::insertInstrEnd(dummyExitBlk, AMDIL::RETURN, passRep);
+
+  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+         retBlks.begin(),
+       iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+    BlockT *curBlk = *iter;
+    InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+    if (curInstr) {
+      curInstr->eraseFromParent();
+    }
+#if 0
+    if (curBlk->size()==0 && curBlk->pred_size() == 1) {
+      if (DEBUGME) {
+        errs() << "Replace empty block BB" <<  curBlk->getNumber()
+               << " with dummyExitBlock\n";
+      }
+      BlockT *predb = *curBlk->pred_begin();
+      predb->removeSuccessor(curBlk);
+      curBlk = predb;
+    } //handle empty curBlk
+#endif
+    curBlk->addSuccessor(dummyExitBlk);
+    if (DEBUGME) {
+      errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+             << " successors\n";
+    }
+  } //for
+
+  SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk)
+{
+  while (srcBlk->succ_size()) {
+    srcBlk->removeSuccessor(*srcBlk->succ_begin());
+  }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum)
+{
+  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+  if (srcBlkInfo == NULL) {
+    srcBlkInfo = new BlockInfo();
+  }
+
+  srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk)
+{
+  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+  return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::addToTraversalBlock(BlockT *srcBlk)
+{
+  if (DEBUGME) {
+    errs() << "AddToTraversal BB" << srcBlk->getNumber() << "\n";
+  }
+
+  addedToTraversalBlks.push_back(srcBlk);
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk)
+{
+  if (DEBUGME) {
+    errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+  }
+
+  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+  if (srcBlkInfo == NULL) {
+    srcBlkInfo = new BlockInfo();
+  }
+
+  srcBlkInfo->isRetired = true;
+  //int i = srcBlk->succ_size();
+  //int j = srcBlk->pred_size();
+  assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+         && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk)
+{
+  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+  return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk)
+{
+  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+  while (loopRep && loopRep->getHeader() == curBlk) {
+    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+    if(loopLand == NULL)
+      return true;
+
+    BlockT *landBlk = loopLand->landBlk;
+    assert(landBlk);
+    if (!isRetiredBlock(landBlk)) {
+      return true;
+    }
+
+    loopRep = loopRep->getParentLoop();
+  }
+
+  return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk)
+{
+  const unsigned blockSizeThreshold = 30;
+  const unsigned cloneInstrThreshold = 100;
+
+  bool multiplePreds = blk && (blk->pred_size() > 1);
+
+  if(!multiplePreds)
+    return false;
+
+  unsigned blkSize = blk->size();
+  return ((blkSize > blockSizeThreshold)
+          && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+    BlockTSmallerVector &exitBlks,
+    std::set<BlockT *> &exitBlkSet)
+{
+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks;  //in exit path blocks
+
+  for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+       predIterEnd = landBlk->pred_end();
+       predIter != predIterEnd; ++predIter) {
+    BlockT *curBlk = *predIter;
+    if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+      inpathBlks.push_back(curBlk);
+    }
+  } //for
+
+  //if landBlk has predecessors that are not in the given loop,
+  //create a new block
+  BlockT *newLandBlk = landBlk;
+  if (inpathBlks.size() != landBlk->pred_size()) {
+    newLandBlk = funcRep->CreateMachineBasicBlock();
+    funcRep->push_back(newLandBlk);  //insert to function
+    newLandBlk->addSuccessor(landBlk);
+    for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+           inpathBlks.begin(),
+         iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+      BlockT *curBlk = *iter;
+      CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+      //srcBlk, oldBlk, newBlk
+      curBlk->removeSuccessor(landBlk);
+      curBlk->addSuccessor(newLandBlk);
+    }
+    for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+      if (exitBlks[i] == landBlk) {
+        exitBlks[i] = newLandBlk;
+      }
+    }
+    SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+  }
+
+  setLoopLandBlock(loopRep, newLandBlk);
+
+  return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk)
+{
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+  assert(theEntry->landBlk == NULL);
+
+  if (blk == NULL) {
+    blk = funcRep->CreateMachineBasicBlock();
+    funcRep->push_back(blk);  //insert to function
+    SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+  }
+
+  theEntry->landBlk = blk;
+
+  if (DEBUGME) {
+    errs() << "setLoopLandBlock loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  landing-block = BB" << blk->getNumber() << "\n";
+  }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum)
+{
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+
+  theEntry->breakOnRegs.insert(regNum);
+
+  if (DEBUGME) {
+    errs() << "addLoopBreakOnReg loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  regNum = " << regNum << "\n";
+  }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum)
+{
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+  theEntry->contOnRegs.insert(regNum);
+
+  if (DEBUGME) {
+    errs() << "addLoopContOnReg loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  regNum = " << regNum << "\n";
+  }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum)
+{
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+  theEntry->breakInitRegs.insert(regNum);
+
+  if (DEBUGME) {
+    errs() << "addLoopBreakInitReg loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  regNum = " << regNum << "\n";
+  }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum)
+{
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+  theEntry->contInitRegs.insert(regNum);
+
+  if (DEBUGME) {
+    errs() << "addLoopContInitReg loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  regNum = " << regNum << "\n";
+  }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+    RegiT regNum)
+{
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+  theEntry->endbranchInitRegs.insert(regNum);
+
+  if (DEBUGME) {
+    errs() << "addLoopEndbranchInitReg loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  regNum = " << regNum << "\n";
+  }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep)
+{
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep)
+{
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk)
+{
+  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+  if (loopRep == NULL)
+    return false;
+
+  BlockT *loopHeader = loopRep->getHeader();
+
+  return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep)
+{
+  return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd)
+{
+  int count = 0;
+  while (iterStart != iterEnd) {
+    if (!isRetiredBlock(*iterStart)) {
+      ++count;
+    }
+    ++iterStart;
+  }
+
+  return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2)
+{
+
+  if (postDomTree->dominates(blk1, blk2)) {
+    return blk1;
+  }
+  if (postDomTree->dominates(blk2, blk1)) {
+    return blk2;
+  }
+
+  DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+  DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+  // Handle newly cloned node.
+  if (node1 == NULL && blk1->succ_size() == 1) {
+    return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+  }
+  if (node2 == NULL && blk2->succ_size() == 1) {
+    return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+  }
+
+  if (node1 == NULL || node2 == NULL) {
+    return NULL;
+  }
+
+  node1 = node1->getIDom();
+  while (node1) {
+    if (postDomTree->dominates(node1, node2)) {
+      return node1->getBlock();
+    }
+    node1 = node1->getIDom();
+  }
+
+  return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks)
+{
+  BlockT *commonDom;
+  typename std::set<BlockT *>::const_iterator iter = blks.begin();
+  typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+  for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+    BlockT *curBlk = *iter;
+    if (curBlk != commonDom) {
+      commonDom = findNearestCommonPostDom(curBlk, commonDom);
+    }
+  }
+
+  if (DEBUGME) {
+    errs() << "Common post dominator for exit blocks is ";
+    if (commonDom) {
+      errs() << "BB" << commonDom->getNumber() << "\n";
+    } else {
+      errs() << "NULL\n";
+    }
+  }
+
+  return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDIL
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGStructurizer : public MachineFunctionPass
+{
+public:
+  typedef MachineInstr              InstructionType;
+  typedef MachineFunction           FunctionType;
+  typedef MachineBasicBlock         BlockType;
+  typedef MachineLoopInfo           LoopinfoType;
+  typedef MachineDominatorTree      DominatortreeType;
+  typedef MachinePostDominatorTree  PostDominatortreeType;
+  typedef MachineDomTreeNode        DomTreeNodeType;
+  typedef MachineLoop               LoopType;
+//private:
+  const TargetInstrInfo *TII;
+
+//public:
+//  static char ID;
+
+public:
+  AMDILCFGStructurizer(char &pid);
+  const TargetInstrInfo *getTargetInstrInfo() const;
+  // this is abstract base class
+  virtual bool runOnMachineFunction(MachineFunction &F) = 0;
+
+private:
+
+};   //end of class AMDILCFGStructurizer
+
+//char AMDILCFGStructurizer::ID = 0;
+} //end of namespace llvm
+AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid)
+  : MachineFunctionPass(pid), TII(NULL)
+{
+}
+
+const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const
+{
+  return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+
+extern void initializeAMDILCFGPreparePass(PassRegistry&);
+
+class AMDILCFGPrepare : public AMDILCFGStructurizer
+{
+public:
+  static char ID;
+
+public:
+  AMDILCFGPrepare();
+
+  virtual const char *getPassName() const;
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+};   //end of class AMDILCFGPrepare
+
+char AMDILCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDILCFGPrepare::AMDILCFGPrepare()
+  : AMDILCFGStructurizer(ID)
+{
+  initializeAMDILCFGPreparePass(*PassRegistry::getPassRegistry());
+}
+const char *AMDILCFGPrepare::getPassName() const
+{
+  return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const
+{
+  AU.addPreserved<MachineFunctionAnalysis>();
+  AU.addRequired<MachineFunctionAnalysis>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<MachinePostDominatorTree>();
+  AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+
+extern void initializeAMDILCFGPerformPass(PassRegistry&);
+
+class AMDILCFGPerform : public AMDILCFGStructurizer
+{
+public:
+  static char ID;
+
+public:
+  AMDILCFGPerform();
+  virtual const char *getPassName() const;
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+};   //end of class AMDILCFGPerform
+
+char AMDILCFGPerform::ID = 0;
+} //end of namespace llvm
+
+AMDILCFGPerform::AMDILCFGPerform()
+  : AMDILCFGStructurizer(ID)
+{
+  initializeAMDILCFGPerformPass(*PassRegistry::getPassRegistry());
+}
+
+const char *AMDILCFGPerform::getPassName() const
+{
+  return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const
+{
+  AU.addPreserved<MachineFunctionAnalysis>();
+  AU.addRequired<MachineFunctionAnalysis>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<MachinePostDominatorTree>();
+  AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDILCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// this class is tailor to the AMDIL backend
+template<>
+struct CFGStructTraits<AMDILCFGStructurizer> {
+  typedef int RegiT;
+
+  static int getBreakNzeroOpcode(int oldOpcode) {
+    switch(oldOpcode) {
+      ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALNZ);
+    default:
+      assert(0 && "internal error");
+    };
+    return -1;
+  }
+
+  static int getBreakZeroOpcode(int oldOpcode) {
+    switch(oldOpcode) {
+      ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALZ);
+    default:
+      assert(0 && "internal error");
+    };
+    return -1;
+  }
+
+  static int getBranchNzeroOpcode(int oldOpcode) {
+    switch(oldOpcode) {
+      ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALNZ);
+    default:
+      assert(0 && "internal error");
+    };
+    return -1;
+  }
+
+  static int getBranchZeroOpcode(int oldOpcode) {
+    switch(oldOpcode) {
+      ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALZ);
+    default:
+      assert(0 && "internal error");
+    };
+    return -1;
+  }
+
+  static int getContinueNzeroOpcode(int oldOpcode) {
+    switch(oldOpcode) {
+      ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALNZ);
+    default:
+      assert(0 && "internal error");
+    };
+    return -1;
+  }
+
+  static int getContinueZeroOpcode(int oldOpcode) {
+    switch(oldOpcode) {
+      ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALZ);
+    default:
+      assert(0 && "internal error");
+    };
+    return -1;
+  }
+
+// the explicitly represented branch target is the true branch target
+#define getExplicitBranch getTrueBranch
+#define setExplicitBranch setTrueBranch
+
+  static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+    return instr->getOperand(0).getMBB();
+  }
+
+  static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+    instr->getOperand(0).setMBB(blk);
+  }
+
+  static MachineBasicBlock *
+  getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+    assert(blk->succ_size() == 2);
+    MachineBasicBlock *trueBranch = getTrueBranch(instr);
+    MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+    MachineBasicBlock::succ_iterator iterNext = iter;
+    ++iterNext;
+
+    return (*iter == trueBranch) ? *iterNext : *iter;
+  }
+
+  static bool isCondBranch(MachineInstr *instr) {
+    switch (instr->getOpcode()) {
+      ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+      break;
+    default:
+      return false;
+    }
+    return true;
+  }
+
+  static bool isUncondBranch(MachineInstr *instr) {
+    switch (instr->getOpcode()) {
+    case AMDIL::BRANCH:
+      break;
+    default:
+      return false;
+    }
+    return true;
+  }
+
+  static bool isPhimove(MachineInstr *instr) {
+    switch (instr->getOpcode()) {
+      ExpandCaseToAllTypes(AMDIL::MOVE);
+      break;
+    default:
+      return false;
+    }
+    return true;
+  }
+
+  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+    //get DebugLoc from the first MachineBasicBlock instruction with debug info
+    DebugLoc DL;
+    for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+      MachineInstr *instr = &(*iter);
+      if (instr->getDebugLoc().isUnknown() == false) {
+        DL = instr->getDebugLoc();
+      }
+    }
+    return DL;
+  }
+
+  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+    MachineInstr *instr = &*iter;
+    if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+      return instr;
+    }
+    return NULL;
+  }
+
+  // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+  //
+  // BB with backward-edge could have move instructions after the branch
+  // instruction.  Such move instruction "belong to" the loop backward-edge.
+  //
+  static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+    for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+         iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+      // FIXME: Simplify
+      MachineInstr *instr = &*iter;
+      if (instr) {
+        if (isCondBranch(instr) || isUncondBranch(instr)) {
+          return instr;
+        } else if (!isPhimove(instr)) {
+          break;
+        }
+      }
+    }
+    return NULL;
+  }
+
+  static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+    if (iter != blk->rend()) {
+      MachineInstr *instr = &(*iter);
+      if (instr->getOpcode() == AMDIL::RETURN) {
+        return instr;
+      }
+    }
+    return NULL;
+  }
+
+  static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+    if (iter != blk->rend()) {
+      MachineInstr *instr = &(*iter);
+      if (instr->getOpcode() == AMDIL::CONTINUE) {
+        return instr;
+      }
+    }
+    return NULL;
+  }
+
+  static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+    for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+      MachineInstr *instr = &(*iter);
+      if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDIL::BREAK_LOGICALZ_i32)) {
+        return instr;
+      }
+    }
+    return NULL;
+  }
+
+  static bool isReturnBlock(MachineBasicBlock *blk) {
+    MachineInstr *instr = getReturnInstr(blk);
+    bool isReturn = (blk->succ_size() == 0);
+    if (instr) {
+      assert(isReturn);
+    } else if (isReturn) {
+      if (DEBUGME) {
+        errs() << "BB" << blk->getNumber()
+               <<" is return block without RETURN instr\n";
+      }
+    }
+
+    return  isReturn;
+  }
+
+  static MachineBasicBlock::iterator
+  getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+    assert(instr->getParent() == blk && "instruction doesn't belong to block");
+    MachineBasicBlock::iterator iter = blk->begin();
+    MachineBasicBlock::iterator iterEnd = blk->end();
+    while (&(*iter) != instr && iter != iterEnd) {
+      ++iter;
+    }
+
+    assert(iter != iterEnd);
+    return iter;
+  }//getInstrPos
+
+  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+                                         AMDILCFGStructurizer *passRep) {
+    return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+  } //insertInstrBefore
+
+  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+                                         AMDILCFGStructurizer *passRep, DebugLoc DL) {
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineInstr *newInstr =
+      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+    MachineBasicBlock::iterator res;
+    if (blk->begin() != blk->end()) {
+      blk->insert(blk->begin(), newInstr);
+    } else {
+      blk->push_back(newInstr);
+    }
+
+    SHOWNEWINSTR(newInstr);
+
+    return newInstr;
+  } //insertInstrBefore
+
+  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+                             AMDILCFGStructurizer *passRep) {
+    insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+  } //insertInstrEnd
+
+  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+                             AMDILCFGStructurizer *passRep, DebugLoc DL) {
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineInstr *newInstr = blk->getParent()
+                             ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+    blk->push_back(newInstr);
+    //assume the instruction doesn't take any reg operand ...
+
+    SHOWNEWINSTR(newInstr);
+  } //insertInstrEnd
+
+  static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+                                         int newOpcode,
+                                         AMDILCFGStructurizer *passRep,
+                                         DebugLoc DL) {
+    MachineInstr *oldInstr = &(*instrPos);
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineBasicBlock *blk = oldInstr->getParent();
+    MachineInstr *newInstr =
+      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+    blk->insert(instrPos, newInstr);
+    //assume the instruction doesn't take any reg operand ...
+
+    SHOWNEWINSTR(newInstr);
+    return newInstr;
+  } //insertInstrBefore
+
+  static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+                                         int newOpcode,
+                                         AMDILCFGStructurizer *passRep) {
+    return insertInstrBefore(instrPos, newOpcode, passRep, DebugLoc());
+  } //insertInstrBefore
+
+  static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+                                     int newOpcode,
+                                     AMDILCFGStructurizer *passRep,
+                                     DebugLoc DL) {
+    MachineInstr *oldInstr = &(*instrPos);
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineBasicBlock *blk = oldInstr->getParent();
+    MachineInstr *newInstr =
+      blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+                                           DL);
+
+    blk->insert(instrPos, newInstr);
+    MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
+                                         false);
+
+    SHOWNEWINSTR(newInstr);
+    //erase later oldInstr->eraseFromParent();
+  } //insertCondBranchBefore
+
+  static void insertCondBranchBefore(MachineBasicBlock *blk,
+                                     MachineBasicBlock::iterator insertPos,
+                                     int newOpcode,
+                                     AMDILCFGStructurizer *passRep,
+                                     RegiT regNum,
+                                     DebugLoc DL) {
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+    MachineInstr *newInstr =
+      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+    //insert before
+    blk->insert(insertPos, newInstr);
+    MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+    SHOWNEWINSTR(newInstr);
+  } //insertCondBranchBefore
+
+  static void insertCondBranchEnd(MachineBasicBlock *blk,
+                                  int newOpcode,
+                                  AMDILCFGStructurizer *passRep,
+                                  RegiT regNum) {
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineInstr *newInstr =
+      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+    blk->push_back(newInstr);
+    MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+    SHOWNEWINSTR(newInstr);
+  } //insertCondBranchEnd
+
+
+  static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+                                      AMDILCFGStructurizer *passRep,
+                                      RegiT regNum, int regVal) {
+    MachineInstr *oldInstr = &(*instrPos);
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineBasicBlock *blk = oldInstr->getParent();
+    MachineInstr *newInstr =
+      blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+                                           DebugLoc());
+    MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
+    MachineInstrBuilder(newInstr).addImm(regVal); //set src value
+
+    blk->insert(instrPos, newInstr);
+
+    SHOWNEWINSTR(newInstr);
+  } //insertAssignInstrBefore
+
+  static void insertAssignInstrBefore(MachineBasicBlock *blk,
+                                      AMDILCFGStructurizer *passRep,
+                                      RegiT regNum, int regVal) {
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+    MachineInstr *newInstr =
+      blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+                                           DebugLoc());
+    MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
+    MachineInstrBuilder(newInstr).addImm(regVal); //set src value
+
+    if (blk->begin() != blk->end()) {
+      blk->insert(blk->begin(), newInstr);
+    } else {
+      blk->push_back(newInstr);
+    }
+
+    SHOWNEWINSTR(newInstr);
+
+  } //insertInstrBefore
+
+  static void insertCompareInstrBefore(MachineBasicBlock *blk,
+                                       MachineBasicBlock::iterator instrPos,
+                                       AMDILCFGStructurizer *passRep,
+                                       RegiT dstReg, RegiT src1Reg,
+                                       RegiT src2Reg) {
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineInstr *newInstr =
+      blk->getParent()->CreateMachineInstr(tii->get(AMDIL::IEQ), DebugLoc());
+
+    MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
+    MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
+    MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
+
+    blk->insert(instrPos, newInstr);
+    SHOWNEWINSTR(newInstr);
+
+  } //insertCompareInstrBefore
+
+  static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+                                 MachineBasicBlock *srcBlk) {
+    for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+         iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+      dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of
+    }
+  } //cloneSuccessorList
+
+  static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+    MachineFunction *func = srcBlk->getParent();
+    MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+    func->push_back(newBlk);  //insert to function
+    //newBlk->setNumber(srcBlk->getNumber());
+    for (MachineBasicBlock::const_instr_iterator iter = srcBlk->instr_begin(),
+         iterEnd = srcBlk->instr_end();
+         iter != iterEnd; ++iter) {
+      MachineInstr *instr = func->CloneMachineInstr(iter);
+      // This is a workaround for LLVM bugzilla 8420 because CloneMachineInstr
+      // does not clone the AsmPrinterFlags.
+      instr->setAsmPrinterFlag(
+        (llvm::MachineInstr::CommentFlag)iter->getAsmPrinterFlags());
+      newBlk->push_back(instr);
+    }
+    return newBlk;
+  }
+
+  //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+  //the AMDIL instruction is not recognized as terminator fix this and retire
+  //this routine
+  static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+                                         MachineBasicBlock *oldBlk,
+                                         MachineBasicBlock *newBlk) {
+    MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+    if (branchInstr && isCondBranch(branchInstr) &&
+        getExplicitBranch(branchInstr) == oldBlk) {
+      setExplicitBranch(branchInstr, newBlk);
+    }
+  }
+
+  static void wrapup(MachineBasicBlock *entryBlk) {
+    assert((!entryBlk->getParent()->getJumpTableInfo()
+            || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+           && "found a jump table");
+
+    //collect continue right before endloop
+    SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+    MachineBasicBlock::iterator pre = entryBlk->begin();
+    MachineBasicBlock::iterator iterEnd = entryBlk->end();
+    MachineBasicBlock::iterator iter = pre;
+    while (iter != iterEnd) {
+      if (pre->getOpcode() == AMDIL::CONTINUE
+          && iter->getOpcode() == AMDIL::ENDLOOP) {
+        contInstr.push_back(pre);
+      }
+      pre = iter;
+      ++iter;
+    } //end while
+
+    //delete continue right before endloop
+    for (unsigned i = 0; i < contInstr.size(); ++i) {
+      contInstr[i]->eraseFromParent();
+    }
+
+    // TODO to fix up jump table so later phase won't be confused.  if
+    // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+    // there isn't such an interface yet.  alternatively, replace all the other
+    // blocks in the jump table with the entryBlk //}
+
+  } //wrapup
+
+  static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) {
+    return &pass.getAnalysis<MachineDominatorTree>();
+  }
+
+  static MachinePostDominatorTree*
+  getPostDominatorTree(AMDILCFGStructurizer &pass) {
+    return &pass.getAnalysis<MachinePostDominatorTree>();
+  }
+
+  static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) {
+    return &pass.getAnalysis<MachineLoopInfo>();
+  }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(AMDILCFGPrepare, "amdcfgprepare",
+                      "AMD IL Control Flow Graph Preparation Pass",
+                      false, false);
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree);
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree);
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo);
+INITIALIZE_PASS_END(AMDILCFGPrepare, "amdcfgprepare",
+                    "AMD IL Control Flow Graph Preparation Pass",
+                    false, false)
+
+INITIALIZE_PASS_BEGIN(AMDILCFGPerform, "amdcfgperform",
+                      "AMD IL Control Flow Graph structurizer Pass",
+                      false, false);
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree);
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree);
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo);
+INITIALIZE_PASS_END(AMDILCFGPerform, "amdcfgperform",
+                    "AMD IL Control Flow Graph structurizer Pass",
+                    false, false)
+
+namespace llvm
+{
+FunctionPass *createAMDILCFGPreparationPass();
+FunctionPass *createAMDILCFGStructurizerPass();
+}
+
+// createAMDILCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGPreparationPass()
+{
+  return new AMDILCFGPrepare();
+}
+
+bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func)
+{
+  TII = func.getTarget().getInstrInfo();
+  return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func,
+         *this);
+}
+
+// createAMDILCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGStructurizerPass()
+{
+  return new AMDILCFGPerform();
+}
+
+bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func)
+{
+  TII = func.getTarget().getInstrInfo();
+  return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func,
+         *this);
+}
+
+//end of file newline goes below
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCallingConv.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCallingConv.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCallingConv.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,50 @@
+//===-- AMDILCallingConv.td -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMDIL architectures.
+//
+//===----------------------------------------------------------------------===//
+
+def RetCC_AMDIL32 : CallingConv<[
+ // Since IL has no return values, all values can be emulated on the stack
+ // The stack can then be mapped to a number of sequential virtual registers
+ // in IL
+
+ CCIfType<[i1, i8, i16, i32, f32], CCAssignToReg<
+ [ include "AMDILRegisterUsesScalar.td" ]> >,
+
+ CCIfType<[v2i32, v2f32, v2i8, v2i16, f64, i64], CCAssignToReg<
+ [ include "AMDILRegisterUsesV2.td" ]> >,
+
+ CCIfType<[v4i32, v4f32, v4i8, v4i16, v2f64, v2i64], CCAssignToReg<
+ [ include "AMDILRegisterUsesV4.td" ]> >,
+
+ CCAssignToStack<16, 16>
+ ]>;
+
+// AMDIL 32-bit C Calling convention.
+def CC_AMDIL32 : CallingConv<[
+ // CCIfByVal<CCPassByVal<4, 4>>,
+ // Since IL has parameter values, all values can be emulated on the stack
+ // The stack can then be mapped to a number of sequential virtual registers
+ // in IL
+
+ CCIfType<[i1, i8, i16, i32, f32], CCAssignToReg<
+ [ include "AMDILRegisterUsesScalar.td" ]> >,
+
+ CCIfType<[v2i32, v2f32, v2i8, v2i16, f64, i64], CCAssignToReg<
+ [ include "AMDILRegisterUsesV2.td" ]> >,
+
+ CCIfType<[v4i32, v4f32, v4i8, v4i16, v2f64, v2i64], CCAssignToReg<
+ [ include "AMDILRegisterUsesV4.td" ]> >,
+
+ CCAssignToStack<16, 16>
+ ]>;
+
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,87 @@
+//===-- AMDILCompilerErrors.h ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_COMPILER_ERRORS_H_
+#define _AMDIL_COMPILER_ERRORS_H_
+// Compiler errors generated by the backend that will cause
+// the runtime to abort compilation. These are mainly for
+// device constraint violations or invalid code.
+namespace amd
+{
+
+#define INVALID_COMPUTE 0
+#define GENERIC_ERROR 1
+#define INTERNAL_ERROR 2
+#define MISSING_FUNCTION_CALL 3
+#define RESERVED_FUNCTION 4
+#define BYTE_STORE_ERROR 5
+#define UNKNOWN_TYPE_NAME 6
+#define NO_IMAGE_SUPPORT 7
+#define NO_ATOMIC_32 8
+#define NO_ATOMIC_64 9
+#define IRREDUCIBLE_CF 10
+#define INSUFFICIENT_RESOURCES 11
+#define INSUFFICIENT_LOCAL_RESOURCES 12
+#define INSUFFICIENT_PRIVATE_RESOURCES 13
+#define INSUFFICIENT_IMAGE_RESOURCES 14
+#define DOUBLE_NOT_SUPPORTED 15
+#define INVALID_CONSTANT_WRITE 16
+#define INSUFFICIENT_CONSTANT_RESOURCES 17
+#define INSUFFICIENT_COUNTER_RESOURCES 18
+#define INSUFFICIENT_REGION_RESOURCES 19
+#define REGION_MEMORY_ERROR 20
+#define MEMOP_NO_ALLOCATION 21
+#define RECURSIVE_FUNCTION 22
+#define INCORRECT_COUNTER_USAGE 23
+#define INVALID_INTRINSIC_USAGE 24
+#define INSUFFICIENT_SEMAPHORE_RESOURCES 25
+#define NO_SEMAPHORE_SUPPORT 26
+#define INVALID_INIT_VALUE 27
+#define NO_FLAT_SUPPORT 28
+#define NUM_ERROR_MESSAGES 29
+
+static const char *CompilerErrorMessage[NUM_ERROR_MESSAGES] = {
+  "E000:Compute Shader Not Supported!   ",
+  "E001:Generic Compiler Error Message! ",
+  "E002:Internal Compiler Error Message!",
+  "E003:Missing Function Call Detected! ",
+  "E004:Reserved Function Call Detected!",
+  "E005:Byte Addressable Stores Invalid!",
+  "E006:Kernel Arg Type Name Is Invalid!",
+  "E007:Image Extension Unsupported!    ",
+  "E008:32bit Atomic Op are Unsupported!",
+  "E009:64bit Atomic Op are Unsupported!",
+  "E010:Irreducible ControlFlow Detected",
+  "E011:Insufficient Resources Detected!",
+  "E012:Insufficient Local Resources!   ",
+  "E013:Insufficient Private Resources! ",
+  "E014:Images not currently supported! ",
+  "E015:Double precision not supported! ",
+  "E016:Invalid Constant Memory Write!  ",
+  "E017:Max number Constant Ptr reached!",
+  "E018:Max number of Counters reached! ",
+  "E019:Insufficient Region Resources!  ",
+  "E020:Region address space invalid!   ",
+  "E021:MemOp with no memory allocated! ",
+  "E022:Recursive Function detected!    ",
+  "E023:Illegal Inc+Dec to same counter!",
+  "E024:Illegal usage of intrinsic inst!",
+  "E025:Insufficient Semaphore Resources",
+  "E026:Semaphores not supported!       ",
+  "E027:Semaphore init value is invalid!",
+  "E028:Flat address is not supported!  "
+};
+
+}
+
+#endif // _AMDIL_COMPILER_ERRORS_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,37 @@
+//===-- AMDILCompilerWarnings.h -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_COMPILER_WARNINGS_H_
+#define _AMDIL_COMPILER_WARNINGS_H_
+/// Compiler backend generated warnings that might cause
+/// issues with compilation. These warnings become errors if
+/// -Werror is specified on the command line.
+namespace amd
+{
+
+#define LIMIT_BARRIER 0
+#define BAD_BARRIER_OPT 1
+#define RECOVERABLE_ERROR 2
+#define NUM_WARN_MESSAGES 3
+
+static const char *CompilerWarningMessage[NUM_WARN_MESSAGES] = {
+  /// All warnings must be prefixed with the W token or they might be
+  /// treated as errors.
+  "W000:Barrier caused limited groupsize",
+  "W001:Dangerous Barrier Opt Detected! ",
+  "W002:Recoverable BE Error Detected!  "
+
+};
+}
+
+#endif // _AMDIL_COMPILER_WARNINGS_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1043 @@
+//===-- AMDILConversions.td -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def actos_i16:Pat < (i16 (anyext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (USHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24))) >;
+
+
+def uctos_i16:Pat < (i16 (zext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (USHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24))) >;
+
+
+def sctos_i16:Pat < (i16 (sext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (SHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24))) >;
+
+
+def actoi_i32:Pat < (i32 (anyext GPRI8:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24))) >;
+
+
+def uctoi_i32:Pat < (i32 (zext GPRI8:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24))) >;
+
+
+def sctoi_i32:Pat < (i32 (sext GPRI8:$src)),
+(IL_ASINT_i32
+ (SHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24))) >;
+
+
+def actol_i64:Pat < (i64 (anyext GPRI8:$src)),
+(LCREATE
+ (USHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24)),
+ (LOADCONST_i32 0)) >;
+
+
+def uctol_i64:Pat < (i64 (zext GPRI8:$src)),
+(LCREATE
+ (USHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24)),
+ (LOADCONST_i32 0)) >;
+
+
+def sctol_i64:Pat < (i64 (sext GPRI8:$src)),
+(LCREATE
+ (SHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24)),
+ (SHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 31))) >;
+
+
+def astoi_i32:Pat < (i32 (anyext GPRI16:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+  (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+   (LOADCONST_i32 16)),
+  (LOADCONST_i32 16))) >;
+
+
+def ustoi_i32:Pat < (i32 (zext GPRI16:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+  (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+   (LOADCONST_i32 16)),
+  (LOADCONST_i32 16))) >;
+
+
+def sstoi_i32:Pat < (i32 (sext GPRI16:$src)),
+(IL_ASINT_i32
+ (SHR_i32
+  (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+   (LOADCONST_i32 16)),
+  (LOADCONST_i32 16))) >;
+
+
+def astol_i64:Pat < (i64 (anyext GPRI16:$src)),
+(LCREATE
+ (USHR_i32
+  (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+   (LOADCONST_i32 16)),
+  (LOADCONST_i32 16)),
+ (LOADCONST_i32 0)) >;
+
+
+def ustol_i64:Pat < (i64 (zext GPRI16:$src)),
+(LCREATE
+ (USHR_i32
+  (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+   (LOADCONST_i32 16)),
+  (LOADCONST_i32 16)),
+ (LOADCONST_i32 0)) >;
+
+
+def sstol_i64:Pat < (i64 (sext GPRI16:$src)),
+(LCREATE
+ (SHR_i32
+  (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+   (LOADCONST_i32 16)),
+  (LOADCONST_i32 16)),
+ (SHR_i32
+  (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+   (LOADCONST_i32 16)),
+  (LOADCONST_i32 31))) >;
+
+
+def aitol_i64:Pat < (i64 (anyext GPRI32:$src)),
+(LCREATE
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)) >;
+
+
+def uitol_i64:Pat < (i64 (zext GPRI32:$src)),
+(LCREATE
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)) >;
+
+
+def sitol_i64:Pat < (i64 (sext GPRI32:$src)),
+(LCREATE
+(IL_ASINT_i32 GPRI32:$src),
+ (SHR_i32
+  (SHL_i32
+(IL_ASINT_i32 GPRI32:$src),
+   (LOADCONST_i32 0)),
+  (LOADCONST_i32 31))) >;
+
+
+
+def sctof_f32:Pat < (f32 (sint_to_fp GPRI8:$src)),
+(f32
+ (ITOF
+  (SHR_i32
+   (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+    (LOADCONST_i32 24)),
+   (LOADCONST_i32 24)))) >;
+
+
+def uctof_f32:Pat < (f32 (uint_to_fp GPRI8:$src)),
+(f32
+ (UTOF
+  (USHR_i32
+   (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+    (LOADCONST_i32 24)),
+   (LOADCONST_i32 24)))) >;
+
+
+def ftosc_i8:Pat < (i8 (fp_to_sint GPRF32:$src)),
+(i8
+ (IL_ASCHAR_i32
+  (BINARY_AND_i32
+(FTOI GPRF32:$src),
+   (LOADCONST_i32 0x000000FF)))) >;
+
+
+def ftouc_i8:Pat < (i8 (fp_to_uint GPRF32:$src)),
+(i8
+ (IL_ASCHAR_i32
+  (BINARY_AND_i32
+(FTOU GPRF32:$src),
+   (LOADCONST_i32 0x000000FF)))) >;
+
+
+def sctod_f64:Pat < (f64 (sint_to_fp GPRI8:$src)),
+(f64 (FTOD
+      (ITOF
+       (SHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24))))) >;
+
+
+def uctod_f64:Pat < (f64 (uint_to_fp GPRI8:$src)),
+(f64 (FTOD
+      (UTOF
+       (USHR_i32
+  (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+   (LOADCONST_i32 24)),
+  (LOADCONST_i32 24))))) >;
+
+
+def dtosc_i8:Pat < (i8 (fp_to_sint GPRF64:$src)),
+(i8
+ (IL_ASCHAR_i32
+  (BINARY_AND_i32
+(FTOI (DTOF GPRF64:$src)),
+   (LOADCONST_i32 0x000000FF)))) >;
+
+
+def dtouc_i8:Pat < (i8 (fp_to_uint GPRF64:$src)),
+(i8
+ (IL_ASCHAR_i32
+  (BINARY_AND_i32
+(FTOU (DTOF GPRF64:$src)),
+   (LOADCONST_i32 0x000000FF)))) >;
+
+
+def sstof_f32:Pat < (f32 (sint_to_fp GPRI16:$src)),
+(f32
+ (ITOF
+  (SHR_i32
+   (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+    (LOADCONST_i32 16)),
+   (LOADCONST_i32 16)))) >;
+
+
+def ustof_f32:Pat < (f32 (uint_to_fp GPRI16:$src)),
+(f32
+ (UTOF
+  (USHR_i32
+   (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+    (LOADCONST_i32 16)),
+   (LOADCONST_i32 16)))) >;
+
+
+def ftoss_i16:Pat < (i16 (fp_to_sint GPRF32:$src)),
+(i16
+ (IL_ASSHORT_i32
+  (BINARY_AND_i32
+(FTOI GPRF32:$src),
+   (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def ftous_i16:Pat < (i16 (fp_to_uint GPRF32:$src)),
+(i16
+ (IL_ASSHORT_i32
+  (BINARY_AND_i32
+(FTOU GPRF32:$src),
+   (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def sstod_f64:Pat < (f64 (sint_to_fp GPRI16:$src)),
+(f64 (FTOD
+      (ITOF
+       (SHR_i32
+  (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+   (LOADCONST_i32 16)),
+  (LOADCONST_i32 16))))) >;
+
+
+def ustod_f64:Pat < (f64 (uint_to_fp GPRI16:$src)),
+(f64 (FTOD
+      (UTOF
+       (USHR_i32
+  (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+   (LOADCONST_i32 16)),
+  (LOADCONST_i32 16))))) >;
+
+
+def dtoss_i16:Pat < (i16 (fp_to_sint GPRF64:$src)),
+(i16
+ (IL_ASSHORT_i32
+  (BINARY_AND_i32
+(FTOI (DTOF GPRF64:$src)),
+   (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def dtous_i16:Pat < (i16 (fp_to_uint GPRF64:$src)),
+(i16
+ (IL_ASSHORT_i32
+  (BINARY_AND_i32
+(FTOU (DTOF GPRF64:$src)),
+   (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+
+
+
+def stoc_i8:Pat < (i8 (trunc GPRI16:$src)),
+(IL_ASCHAR_i32
+ (SHR_i32
+ (SHL_i32
+   (IL_ASINT_i16
+(BINARY_AND_i16 GPRI16:$src,
+     (LOADCONST_i16 0x000000FF))),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))
+ )>;
+
+
+def itoc_i8:Pat < (i8 (trunc GPRI32:$src)),
+(IL_ASCHAR_i32
+ (SHR_i32
+ (SHL_i32
+   (IL_ASINT_i32
+(BINARY_AND_i32 GPRI32:$src,
+     (LOADCONST_i32 0x000000FF)))
+ , (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))
+  ) >;
+
+
+def itos_i16:Pat < (i16 (trunc GPRI32:$src)),
+(IL_ASSHORT_i32
+ (SHR_i32
+ (SHL_i32
+   (IL_ASINT_i32
+(BINARY_AND_i32 GPRI32:$src,
+     (LOADCONST_i32 0x0000FFFF)))
+ , (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))
+  ) >;
+
+
+def ltoc_i8:Pat < (i8 (trunc GPRI64:$src)),
+(IL_ASCHAR_i32
+ (SHR_i32
+ (SHL_i32
+   (BINARY_AND_i32
+(LLO GPRI64:$src),
+    (LOADCONST_i32 0x000000FF))
+ , (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))
+  ) >;
+
+
+def ltos_i16:Pat < (i16 (trunc GPRI64:$src)),
+(IL_ASSHORT_i32
+ (SHR_i32
+ (SHL_i32
+   (BINARY_AND_i32
+(LLO GPRI64:$src),
+    (LOADCONST_i32 0x0000FFFF))
+ , (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))
+  ) >;
+
+
+def ltoi_i32:Pat < (i32 (trunc GPRI64:$src)), (LLO GPRI64:$src) >;
+
+def actos_v2i16:Pat < (v2i16 (anyext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (USHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 24))),
+  (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def uctos_v2i16:Pat < (v2i16 (zext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (USHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 24))),
+  (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def sctos_v2i16:Pat < (v2i16 (sext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (SHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 24))),
+  (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def actoi_v2i32:Pat < (v2i32 (anyext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 24))),
+  (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def uctoi_v2i32:Pat < (v2i32 (zext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 24))),
+  (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def sctoi_v2i32:Pat < (v2i32 (sext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (SHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 24))),
+  (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def actol_v2i64:Pat < (v2i64 (anyext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 24))),
+  (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def uctol_v2i64:Pat < (v2i64 (zext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 24))),
+  (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sctol_v2i64:Pat < (v2i64 (sext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (SHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 24))),
+  (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (SHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 24))),
+  (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+def astoi_v2i32:Pat < (v2i32 (anyext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 16))),
+  (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def ustoi_v2i32:Pat < (v2i32 (zext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 16))),
+  (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def sstoi_v2i32:Pat < (v2i32 (sext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (SHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 16))),
+  (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def astol_v2i64:Pat < (v2i64 (anyext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 16))),
+  (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def ustol_v2i64:Pat < (v2i64 (zext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 16))),
+  (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sstol_v2i64:Pat < (v2i64 (sext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (SHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 16))),
+  (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (SHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 16))),
+  (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+def aitol_v2i64:Pat < (v2i64 (anyext GPRV2I32:$src)),
+(LCREATE_v2i64
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def uitol_v2i64:Pat < (v2i64 (zext GPRV2I32:$src)),
+(LCREATE_v2i64
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sitol_v2i64:Pat < (v2i64 (sext GPRV2I32:$src)),
+(LCREATE_v2i64
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (SHRVEC_v2i32
+  (SHLVEC_v2i32
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 0))),
+  (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+
+def sctof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I8:$src)),
+(v2f32
+ (ITOF_v2f32
+  (SHRVEC_v2i32
+   (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+    (VCREATE_v2i32 (LOADCONST_i32 24))),
+   (VCREATE_v2i32 (LOADCONST_i32 24))))) >;
+
+
+def uctof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I8:$src)),
+(v2f32
+ (UTOF_v2f32
+  (USHRVEC_v2i32
+   (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+    (VCREATE_v2i32 (LOADCONST_i32 24))),
+   (VCREATE_v2i32 (LOADCONST_i32 24))))) >;
+
+
+def ftosc_v2i8:Pat < (v2i8 (fp_to_sint GPRV2F32:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+  (BINARY_AND_v2i32
+(FTOI_v2i32 GPRV2F32:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def ftouc_v2i8:Pat < (v2i8 (fp_to_uint GPRV2F32:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+  (BINARY_AND_v2i32
+(FTOU_v2i32 GPRV2F32:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+def sctod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I8:$src)),
+(v2f64 
+ (VINSERT_v2f64
+ (VCREATE_v2f64 
+ (FTOD
+  (VEXTRACT_v2f32
+  (ITOF_v2f32
+   (SHRVEC_v2i32
+    (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+     (VCREATE_v2i32 (LOADCONST_i32 24))),
+    (VCREATE_v2i32 (LOADCONST_i32 24)))),
+  1)
+ )),
+ (FTOD
+  (VEXTRACT_v2f32
+  (ITOF_v2f32
+   (SHRVEC_v2i32
+    (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+     (VCREATE_v2i32 (LOADCONST_i32 24))),
+    (VCREATE_v2i32 (LOADCONST_i32 24)))),
+  2)
+  ), 1, 256)
+ ) >;
+
+def uctod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I8:$src)),
+(v2f64 
+ (VINSERT_v2f64
+ (VCREATE_v2f64 
+ (FTOD
+  (VEXTRACT_v2f32
+  (UTOF_v2f32
+   (USHRVEC_v2i32
+    (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+     (VCREATE_v2i32 (LOADCONST_i32 24))),
+    (VCREATE_v2i32 (LOADCONST_i32 24)))),
+  1)
+ )),
+ (FTOD
+  (VEXTRACT_v2f32
+  (UTOF_v2f32
+   (USHRVEC_v2i32
+    (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+     (VCREATE_v2i32 (LOADCONST_i32 24))),
+    (VCREATE_v2i32 (LOADCONST_i32 24)))),
+  2)
+  ), 1, 256)
+ ) >;
+
+
+def dtosc_v2i8:Pat < (v2i8 (fp_to_sint GPRV2F64:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+  (BINARY_AND_v2i32
+(FTOI_v2i32 (VINSERT_v2f32 
+             (VCREATE_v2f32 
+              (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+             (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+   (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def dtouc_v2i8:Pat < (v2i8 (fp_to_uint GPRV2F64:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+  (BINARY_AND_v2i32
+(FTOU_v2i32 (VINSERT_v2f32 
+             (VCREATE_v2f32 
+              (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+             (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+   (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def sstof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I16:$src)),
+(v2f32
+ (ITOF_v2f32
+  (SHRVEC_v2i32
+   (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+    (VCREATE_v2i32 (LOADCONST_i32 16))),
+   (VCREATE_v2i32 (LOADCONST_i32 16))))) >;
+
+
+def ustof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I16:$src)),
+(v2f32
+ (UTOF_v2f32
+  (USHRVEC_v2i32
+   (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+    (VCREATE_v2i32 (LOADCONST_i32 16))),
+   (VCREATE_v2i32 (LOADCONST_i32 16))))) >;
+
+
+def ftoss_v2i16:Pat < (v2i16 (fp_to_sint GPRV2F32:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+  (BINARY_AND_v2i32
+(FTOI_v2i32 GPRV2F32:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def ftous_v2i16:Pat < (v2i16 (fp_to_uint GPRV2F32:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+  (BINARY_AND_v2i32
+(FTOU_v2i32 GPRV2F32:$src),
+   (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def sstod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I16:$src)),
+(v2f64 
+ (VINSERT_v2f64
+ (VCREATE_v2f64 
+ (FTOD
+  (VEXTRACT_v2f32
+  (ITOF_v2f32
+   (SHRVEC_v2i32
+    (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+     (VCREATE_v2i32 (LOADCONST_i32 16))),
+    (VCREATE_v2i32 (LOADCONST_i32 16)))),
+  1)
+ )),
+ (FTOD
+  (VEXTRACT_v2f32
+  (ITOF_v2f32
+   (SHRVEC_v2i32
+    (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+     (VCREATE_v2i32 (LOADCONST_i32 16))),
+    (VCREATE_v2i32 (LOADCONST_i32 16)))),
+  2)
+  ), 1, 256)
+ ) >;
+
+def ustod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I16:$src)),
+(v2f64 
+ (VINSERT_v2f64
+ (VCREATE_v2f64 
+ (FTOD
+  (VEXTRACT_v2f32
+  (UTOF_v2f32
+   (USHRVEC_v2i32
+    (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+     (VCREATE_v2i32 (LOADCONST_i32 16))),
+    (VCREATE_v2i32 (LOADCONST_i32 16)))),
+  1)
+ )),
+ (FTOD
+  (VEXTRACT_v2f32
+  (UTOF_v2f32
+   (USHRVEC_v2i32
+    (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+     (VCREATE_v2i32 (LOADCONST_i32 16))),
+    (VCREATE_v2i32 (LOADCONST_i32 16)))),
+  2)
+  ), 1, 256)
+ ) >;
+
+
+def dtoss_v2i16:Pat < (v2i16 (fp_to_sint GPRV2F64:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+  (BINARY_AND_v2i32
+(FTOI_v2i32 (VINSERT_v2f32 
+             (VCREATE_v2f32 
+              (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+             (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+   (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def dtous_v2i16:Pat < (v2i16 (fp_to_uint GPRV2F64:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+  (BINARY_AND_v2i32
+(FTOU_v2i32 (VINSERT_v2f32 
+             (VCREATE_v2f32 
+              (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+             (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+   (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+def stoc_v2i8:Pat < (v2i8 (trunc GPRV2I16:$src)),
+(IL_ASV2CHAR_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+   (IL_ASV2INT_v2i16
+(BINARY_AND_v2i16 GPRV2I16:$src,
+     (VCREATE_v2i16 (LOADCONST_i16 0x000000FF))))
+   , (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))
+  ) >;
+
+
+def itoc_v2i8:Pat < (v2i8 (trunc GPRV2I32:$src)),
+(IL_ASV2CHAR_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+   (IL_ASV2INT_v2i32
+(BINARY_AND_v2i32 GPRV2I32:$src,
+     (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))
+   , (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))
+  ) >;
+
+
+def itos_v2i16:Pat < (v2i16 (trunc GPRV2I32:$src)),
+(IL_ASV2SHORT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+   (IL_ASV2INT_v2i32
+(BINARY_AND_v2i32 GPRV2I32:$src,
+     (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))
+   , (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))
+  ) >;
+
+
+def ltoc_v2i8:Pat < (v2i8 (trunc GPRV2I64:$src)),
+(IL_ASV2CHAR_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+   (BINARY_AND_v2i32
+(LLO_v2i64 GPRV2I64:$src),
+    (VCREATE_v2i32 (LOADCONST_i32 0x000000FF)))
+   , (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))
+  ) >;
+
+
+def ltos_v2i16:Pat < (v2i16 (trunc GPRV2I64:$src)),
+(IL_ASV2SHORT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+   (BINARY_AND_v2i32
+(LLO_v2i64 GPRV2I64:$src),
+    (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF)))
+   , (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))
+  ) >;
+
+
+def ltoi_v2i32:Pat < (v2i32 (trunc GPRV2I64:$src)), (LLO_v2i64 GPRV2I64:$src)>;
+
+
+
+def actos_v4i16:Pat < (v4i16 (anyext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (USHRVEC_v4i32
+  (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 24))),
+  (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def uctos_v4i16:Pat < (v4i16 (zext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (USHRVEC_v4i32
+  (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 24))),
+  (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def sctos_v4i16:Pat < (v4i16 (sext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (SHRVEC_v4i32
+  (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 24))),
+  (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def actoi_v4i32:Pat < (v4i32 (anyext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+  (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 24))),
+  (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def uctoi_v4i32:Pat < (v4i32 (zext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+  (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 24))),
+  (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def sctoi_v4i32:Pat < (v4i32 (sext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (SHRVEC_v4i32
+  (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 24))),
+  (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def astoi_v4i32:Pat < (v4i32 (anyext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+  (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 16))),
+  (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+def ustoi_v4i32:Pat < (v4i32 (zext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+  (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 16))),
+  (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+def sstoi_v4i32:Pat < (v4i32 (sext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (SHRVEC_v4i32
+  (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 16))),
+  (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+
+def sctof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I8:$src)),
+(v4f32
+ (ITOF_v4f32
+  (SHRVEC_v4i32
+   (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+    (VCREATE_v4i32 (LOADCONST_i32 24))),
+   (VCREATE_v4i32 (LOADCONST_i32 24))))) >;
+
+
+def uctof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I8:$src)),
+(v4f32
+ (UTOF_v4f32
+  (USHRVEC_v4i32
+   (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+    (VCREATE_v4i32 (LOADCONST_i32 24))),
+   (VCREATE_v4i32 (LOADCONST_i32 24))))) >;
+
+
+def ftosc_v4i8:Pat < (v4i8 (fp_to_sint GPRV4F32:$src)),
+(v4i8
+ (IL_ASV4CHAR_v4i32
+  (BINARY_AND_v4i32
+(FTOI_v4i32 GPRV4F32:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def ftouc_v4i8:Pat < (v4i8 (fp_to_uint GPRV4F32:$src)),
+(v4i8
+ (IL_ASV4CHAR_v4i32
+  (BINARY_AND_v4i32
+(FTOU_v4i32 GPRV4F32:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def sstof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I16:$src)),
+(v4f32
+ (ITOF_v4f32
+  (SHRVEC_v4i32
+   (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+    (VCREATE_v4i32 (LOADCONST_i32 16))),
+   (VCREATE_v4i32 (LOADCONST_i32 16))))) >;
+
+
+def ustof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I16:$src)),
+(v4f32
+ (UTOF_v4f32
+  (USHRVEC_v4i32
+   (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+    (VCREATE_v4i32 (LOADCONST_i32 16))),
+   (VCREATE_v4i32 (LOADCONST_i32 16))))) >;
+
+
+def ftoss_v4i16:Pat < (v4i16 (fp_to_sint GPRV4F32:$src)),
+(v4i16
+ (IL_ASV4SHORT_v4i32
+  (BINARY_AND_v4i32
+(FTOI_v4i32 GPRV4F32:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def ftous_v4i16:Pat < (v4i16 (fp_to_uint GPRV4F32:$src)),
+(v4i16
+ (IL_ASV4SHORT_v4i32
+  (BINARY_AND_v4i32
+(FTOU_v4i32 GPRV4F32:$src),
+   (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+
+
+
+def stoc_v4i8:Pat < (v4i8 (trunc GPRV4I16:$src)),
+(IL_ASV4CHAR_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+   (IL_ASV4INT_v4i16
+(BINARY_AND_v4i16 GPRV4I16:$src,
+     (VCREATE_v4i16 (LOADCONST_i16 0x000000FF))))
+   , (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))
+  ) >;
+
+
+def itoc_v4i8:Pat < (v4i8 (trunc GPRV4I32:$src)),
+(IL_ASV4CHAR_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+   (IL_ASV4INT_v4i32
+(BINARY_AND_v4i32 GPRV4I32:$src,
+     (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))
+   , (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))
+  ) >;
+
+
+def itos_v4i16:Pat < (v4i16 (trunc GPRV4I32:$src)),
+(IL_ASV4SHORT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+   (IL_ASV4INT_v4i32
+(BINARY_AND_v4i32 GPRV4I32:$src,
+     (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))
+   , (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))
+  ) >;
+
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,149 @@
+//===-- AMDILDevice.cpp ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST)
+{
+  mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+  mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+  setCaps();
+  mDeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDILDevice::~AMDILDevice()
+{
+  mHWBits.clear();
+  mSWBits.clear();
+}
+
+size_t AMDILDevice::getMaxGDSSize() const
+{
+  return 0;
+}
+
+uint32_t
+AMDILDevice::getDeviceFlag() const
+{
+  return mDeviceFlag;
+}
+
+size_t AMDILDevice::getMaxNumCBs() const
+{
+  if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+    return HW_MAX_NUM_CB;
+  }
+
+  return 0;
+}
+
+size_t AMDILDevice::getMaxCBSize() const
+{
+  if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+    return MAX_CB_SIZE;
+  }
+
+  return 0;
+}
+
+size_t AMDILDevice::getMaxScratchSize() const
+{
+  return 65536;
+}
+
+uint32_t AMDILDevice::getStackAlignment() const
+{
+  return 16;
+}
+
+void AMDILDevice::setCaps()
+{
+  mSWBits.set(AMDILDeviceInfo::HalfOps);
+  mSWBits.set(AMDILDeviceInfo::ByteOps);
+  mSWBits.set(AMDILDeviceInfo::ShortOps);
+  mSWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+  if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) {
+    mSWBits.set(AMDILDeviceInfo::NoInline);
+  }
+  if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) {
+    mSWBits.set(AMDILDeviceInfo::MacroDB);
+  }
+  if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) {
+    mSWBits.set(AMDILDeviceInfo::NoAlias);
+  }
+  if (mSTM->isApple()) {
+    mSWBits.set(AMDILDeviceInfo::ConstantMem);
+  } else {
+    if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+      mSWBits.set(AMDILDeviceInfo::ConstantMem);
+    } else {
+      mHWBits.set(AMDILDeviceInfo::ConstantMem);
+    }
+  }
+  if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+    mSWBits.set(AMDILDeviceInfo::PrivateMem);
+  } else {
+    mHWBits.set(AMDILDeviceInfo::PrivateMem);
+  }
+  if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) {
+    mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+  }
+  mSWBits.set(AMDILDeviceInfo::ByteLDSOps);
+  mSWBits.set(AMDILDeviceInfo::ByteGDSOps);
+  mSWBits.set(AMDILDeviceInfo::LongOps);
+}
+
+AMDILDeviceInfo::ExecutionMode
+AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
+{
+  if (mHWBits[Caps]) {
+    assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+    return AMDILDeviceInfo::Hardware;
+  }
+
+  if (mSWBits[Caps]) {
+    assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+    return AMDILDeviceInfo::Software;
+  }
+
+  return AMDILDeviceInfo::Unsupported;
+
+}
+
+bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
+{
+  return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
+}
+
+bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
+{
+  return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
+}
+
+bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
+{
+  return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
+}
+
+std::string
+AMDILDevice::getDataLayout() const
+{
+  return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+                     "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+                     "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+                     "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+                     "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+                     "-n8:16:32:64");
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,130 @@
+//===-- AMDILDevice.h -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILDEVICEIMPL_H_
+#define _AMDILDEVICEIMPL_H_
+#include "AMDIL.h"
+#include "AMDILLLVMPC.h"
+#include "llvm/ADT/BitVector.h"
+namespace llvm
+{
+class AMDILSubtarget;
+class AMDILAsmPrinter;
+class AMDILIOExpansion;
+class AMDILPointerManager;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDILDevice
+{
+public:
+  AMDILDevice(AMDILSubtarget *ST);
+  virtual ~AMDILDevice();
+
+  // Enum values for the various memory types.
+  enum {
+    RAW_UAV_ID   = 0,
+    ARENA_UAV_ID = 1,
+    LDS_ID       = 2,
+    GDS_ID       = 3,
+    SCRATCH_ID   = 4,
+    CONSTANT_ID  = 5,
+    GLOBAL_ID    = 6,
+    MAX_IDS      = 7
+  } IO_TYPE_IDS;
+
+  // Returns the max LDS size that the hardware supports.  Size is in
+  // bytes.
+  virtual size_t getMaxLDSSize() const = 0;
+
+  // Returns the max GDS size that the hardware supports if the GDS is
+  // supported by the hardware.  Size is in bytes.
+  virtual size_t getMaxGDSSize() const;
+
+  // Returns the max number of hardware constant address spaces that
+  // are supported by this device.
+  virtual size_t getMaxNumCBs() const;
+
+  // Returns the max number of bytes a single hardware constant buffer
+  // can support.  Size is in bytes.
+  virtual size_t getMaxCBSize() const;
+
+  // Returns the max number of bytes allowed by the hardware scratch
+  // buffer.  Size is in bytes.
+  virtual size_t getMaxScratchSize() const;
+
+  // Get the flag that corresponds to the device.
+  virtual uint32_t getDeviceFlag() const;
+
+  // Returns the number of work-items that exist in a single hardware
+  // wavefront.
+  virtual size_t getWavefrontSize() const = 0;
+
+  // Get the generational name of this specific device.
+  virtual uint32_t getGeneration() const = 0;
+
+  // Get the stack alignment of this specific device.
+  virtual uint32_t getStackAlignment() const;
+
+  // Get the resource ID for this specific device.
+  virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+  // Get the max number of UAV's for this device.
+  virtual uint32_t getMaxNumUAVs() const = 0;
+
+  // Interface to get the IO Expansion pass for each device.
+  virtual FunctionPass*
+  getIOExpansion(TargetMachine&, CodeGenOpt::Level) const = 0;
+
+  // Interface to get the Asm printer for each device.
+  virtual AsmPrinter*
+  getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const = 0;
+
+  // Interface to get the Pointer manager pass for each device.
+  virtual FunctionPass*
+  getPointerManager(TargetMachine&, CodeGenOpt::Level) const = 0;
+
+
+  // API utilizing more detailed capabilities of each family of
+  // cards. If a capability is supported, then either usesHardware or
+  // usesSoftware returned true.  If usesHardware returned true, then
+  // usesSoftware must return false for the same capability.  Hardware
+  // execution means that the feature is done natively by the hardware
+  // and is not emulated by the softare.  Software execution means
+  // that the feature could be done in the hardware, but there is
+  // software that emulates it with possibly using the hardware for
+  // support since the hardware does not fully comply with OpenCL
+  // specs.
+  bool isSupported(AMDILDeviceInfo::Caps Mode) const;
+  bool usesHardware(AMDILDeviceInfo::Caps Mode) const;
+  bool usesSoftware(AMDILDeviceInfo::Caps Mode) const;
+  virtual std::string getDataLayout() const;
+  static const unsigned int MAX_LDS_SIZE_700 = 16384;
+  static const unsigned int MAX_LDS_SIZE_800 = 32768;
+  static const unsigned int MAX_GDS_SIZE_800 = 32768;
+  static const unsigned int WavefrontSize = 64;
+  static const unsigned int HalfWavefrontSize = 32;
+  static const unsigned int QuarterWavefrontSize = 16;
+protected:
+  virtual void setCaps();
+  llvm::BitVector mHWBits;
+  llvm::BitVector mSWBits;
+  AMDILSubtarget *mSTM;
+  uint32_t mDeviceFlag;
+private:
+  AMDILDeviceInfo::ExecutionMode
+  getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
+}; // AMDILDevice
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,132 @@
+//===-- AMDILDeviceInfo.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILDevices.h"
+#include "AMDILSubtarget.h"
+#include <string>
+using namespace llvm;
+namespace llvm
+{
+AMDILDevice*
+getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
+{
+  if (deviceName.c_str()[2] == '7') {
+    switch (deviceName.c_str()[3]) {
+    case '1':
+      return new AMDIL710Device(ptr);
+    case '7':
+      return new AMDIL770Device(ptr);
+    default:
+      return new AMDIL7XXDevice(ptr);
+    };
+  } else if (deviceName == "cypress") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    return new AMDILCypressDevice(ptr);
+  } else if (deviceName == "juniper") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    return new AMDILEvergreenDevice(ptr);
+  } else if (deviceName == "redwood") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    return new AMDILRedwoodDevice(ptr);
+  } else if (deviceName == "cedar") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    return new AMDILCedarDevice(ptr);
+  } else if (deviceName == "barts"
+             || deviceName == "turks") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    return new AMDILNIDevice(ptr);
+  } else if (deviceName == "cayman"
+             || deviceName == "kauai") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    return new AMDILCaymanDevice(ptr);
+  } else if (deviceName == "trinity") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    return new AMDILTrinityDevice(ptr);
+  } else if (deviceName == "caicos") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    return new AMDILNIDevice(ptr);
+  } else if (deviceName == "tahiti") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    if (is64bit) {
+      return new AMDILSIDevice64(ptr);
+    } else {
+      return new AMDILSIDevice32(ptr);
+    }
+  } else if (deviceName == "pitcairn") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    if (is64bit) {
+      return new AMDILSIDevice64(ptr);
+    } else {
+      return new AMDILSIDevice32(ptr);
+    }
+  } else if (deviceName == "capeverde") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    if (is64bit) {
+      return new AMDILSIDevice64(ptr);
+    } else {
+      return new AMDILSIDevice32(ptr);
+    }
+  } else {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+           " on 32bit pointers!");
+#endif
+    return new AMDIL7XXDevice(ptr);
+  }
+}
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,94 @@
+//===-- AMDILDeviceInfo.h -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILDEVICEINFO_H_
+#define _AMDILDEVICEINFO_H_
+#include <string>
+namespace llvm
+{
+class AMDILDevice;
+class AMDILSubtarget;
+namespace AMDILDeviceInfo
+{
+// Each Capabilities can be executed using a hardware instruction,
+// emulated with a sequence of software instructions, or not
+// supported at all.
+enum ExecutionMode {
+  Unsupported = 0, // Unsupported feature on the card(Default value)
+  Software, // This is the execution mode that is set if the
+  // feature is emulated in software
+  Hardware  // This execution mode is set if the feature exists
+  // natively in hardware
+};
+
+// Any changes to this needs to have a corresponding update to the
+// twiki page GPUMetadataABI
+enum Caps {
+  HalfOps          = 0x1,  // Half float is supported or not.
+  DoubleOps        = 0x2,  // Double is supported or not.
+  ByteOps          = 0x3,  // Byte(char) is support or not.
+  ShortOps         = 0x4,  // Short is supported or not.
+  LongOps          = 0x5,  // Long is supported or not.
+  Images           = 0x6,  // Images are supported or not.
+  ByteStores       = 0x7,  // ByteStores available(!HD4XXX).
+  ConstantMem      = 0x8,  // Constant/CB memory.
+  LocalMem         = 0x9,  // Local/LDS memory.
+  PrivateMem       = 0xA,  // Scratch/Private/Stack memory.
+  RegionMem        = 0xB,  // OCL GDS Memory Extension.
+  FMA              = 0xC,  // Use HW FMA or SW FMA.
+  ArenaSegment     = 0xD,  // Use for Arena UAV per pointer 12-1023.
+  MultiUAV         = 0xE,  // Use for UAV per Pointer 0-7.
+  PPAMode          = 0xF,  // UAV Per Pointer Allocation Mode capability
+  NoAlias          = 0x10, // Cached loads.
+  Signed24BitOps   = 0x11, // Peephole Optimization.
+  // Debug mode implies that no hardware features or optimizations
+  // are performned and that all memory access go through a single
+  // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+  Debug            = 0x12, // Debug mode is enabled.
+  CachedMem        = 0x13, // Cached mem is available or not.
+  BarrierDetect    = 0x14, // Detect duplicate barriers.
+  Semaphore        = 0x15, // Flag to specify that semaphores are supported.
+  ByteLDSOps       = 0x16, // Flag to specify if byte LDS ops are available.
+  ArenaVectors     = 0x17, // Flag to specify if vector loads from arena work.
+  TmrReg           = 0x18, // Flag to specify if Tmr register is supported.
+  NoInline         = 0x19, // Flag to specify that no inlining should occur.
+  MacroDB          = 0x1A, // Flag to specify that backend handles macrodb.
+  HW64BitDivMod    = 0x1B, // Flag for backend to generate 64bit div/mod.
+  ArenaUAV         = 0x1C, // Flag to specify that arena uav is supported.
+  PrivateUAV       = 0x1D, // Flag to specify that private memory uses uav's.
+  ByteGDSOps       = 0x1F, // Flag to specify if byte GDS ops are available.
+  FlatMem          = 0x20, // Flag to specify if device supports flat addressing.
+  // If more capabilities are required, then
+  // this number needs to be increased.
+  // All capabilities must come before this
+  // number.
+  MaxNumberCapabilities = 0x30
+};
+// These have to be in order with the older generations
+// having the lower number enumerations.
+enum Generation {
+  HD4XXX = 0, // 7XX based devices.
+  HD5XXX, // Evergreen based devices.
+  HD6XXX, // NI/Evergreen+ based devices.
+  HD7XXX, // SI based devices.
+  HD8XXX, // CI based devices.
+  HDTEST, // Experimental feature testing device.
+  HDNUMGEN
+};
+
+
+} // namespace AMDILDeviceInfo
+llvm::AMDILDevice*
+getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
+} // namespace llvm
+#endif // _AMDILDEVICEINFO_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,24 @@
+//===-- AMDILDevices.h ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __AMDIL_DEVICES_H_
+#define __AMDIL_DEVICES_H_
+// Include all of the device specific header files
+// This file is for Internal use only!
+#include "AMDILDevice.h"
+#include "AMDIL7XXDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILTNDevice.h"
+#include "AMDILSIDevice.h"
+#endif // _AMDIL_DEVICES_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,231 @@
+//===-- AMDILEGAsmPrinter.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILEGAsmPrinter.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+
+// TODO: Add support for verbose.
+AMDILEGAsmPrinter::AMDILEGAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
+  : AMDILAsmPrinter(ASM_PRINTER_ARGUMENTS)
+{
+}
+
+AMDILEGAsmPrinter::~AMDILEGAsmPrinter()
+{
+}
+//
+// @param name
+// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+// and returns that name if both of the tokens are present.
+//
+static
+std::string Strip(const std::string &name)
+{
+  size_t start = name.find("__OpenCL_");
+  size_t end = name.find("_kernel");
+  if (start == std::string::npos
+      || end == std::string::npos
+      || (start == end)) {
+    return name;
+  } else {
+    return name.substr(9, name.length()-16);
+  }
+}
+void
+AMDILEGAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+                                 OSTREAM_TYPE &O)
+{
+  const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+  const char *name = "unknown";
+  llvm::StringRef nameRef;
+  if (MI->getOperand(0).isGlobal()) {
+    nameRef = MI->getOperand(0).getGlobal()->getName();
+    name = nameRef.data();
+  }
+  if (!::strncmp(name, "__fma_f32", 9) && curTarget->device()->usesHardware(
+        AMDILDeviceInfo::FMA)) {
+    name = "__hwfma_f32";
+  }
+  emitMCallInst(MI, O, name);
+}
+
+bool
+AMDILEGAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+  this->MF = &lMF;
+  mMeta->setMF(&lMF);
+  mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+  mAMI = &(lMF.getMMI().getObjFileInfo<AMDILModuleInfo>());
+
+  SetupMachineFunction(lMF);
+  std::string kernelName = MF->getFunction()->getName();
+  mName = Strip(kernelName);
+
+  mKernelName = kernelName;
+  EmitFunctionHeader();
+  EmitFunctionBody();
+  return false;
+}
+void
+AMDILEGAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+  std::string FunStr;
+  raw_string_ostream OFunStr(FunStr);
+  formatted_raw_ostream O(OFunStr);
+  const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+  if (mDebugMode) {
+    O << ";" ;
+    II->print(O);
+  }
+  if (isMacroFunc(II)) {
+    emitMacroFunc(II, O);
+    O.flush();
+    OutStreamer.EmitRawText(StringRef(FunStr));
+    return;
+  }
+  if (isMacroCall(II)) {
+    unsigned reg = 0;
+    unsigned newDst = 0;
+    OpSwizzle opSwiz, oldSwiz;
+    const char *name = mTM->getInstrInfo()->getName(II->getOpcode()) + 5;
+    if (!::strncmp(name, "__fma_f32", 9)
+        && curTarget->device()->usesHardware(
+          AMDILDeviceInfo::FMA)) {
+      name = "__hwfma_f32";
+    }
+    //II->dump();
+    //assert(0 &&
+    //"Found a macro that is still in use!");
+    int macronum = amd::MacroDBFindMacro(name);
+    O << "\t;"<< name<<"\n";
+    O << "\tmcall("<<macronum<<") ";
+    reg = II->getOperand(0).getReg();
+    newDst = AMDIL::R1000;
+    oldSwiz.u8all = opSwiz.u8all =
+                      II->getOperand(0).getTargetFlags();
+    if (isXComponentReg(reg)) {
+      newDst = AMDIL::Rx1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isYComponentReg(reg)) {
+      newDst = AMDIL::Ry1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isZComponentReg(reg)) {
+      newDst = AMDIL::Rz1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isWComponentReg(reg)) {
+      newDst = AMDIL::Rw1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isXYComponentReg(reg)) {
+      newDst = AMDIL::Rxy1000;
+      opSwiz.bits.swizzle = AMDIL_DST_XY__;
+    } else if (isZWComponentReg(reg)) {
+      newDst = AMDIL::Rzw1000;
+      opSwiz.bits.swizzle = AMDIL_DST_XY__;
+    } else {
+      opSwiz.bits.swizzle = AMDIL_DST_DFLT;
+    }
+    for (unsigned x = 0, y = II->getNumOperands(); x < y; ++x) {
+      if (!x) {
+        O << "(";
+        O << getRegisterName(newDst);
+        O << getDstSwizzle(opSwiz.bits.swizzle);
+      } else {
+        printOperand(II, x
+                     , O
+                    );
+      }
+      if (!x) {
+        O << "), (";
+      } else if (x != y - 1) {
+        O << ", ";
+      } else {
+        O << ")\n";
+      }
+    }
+    O << "\tmov " << getRegisterName(reg) << getDstSwizzle(oldSwiz.bits.swizzle)
+      << ", " << getRegisterName(newDst);
+    if (isXComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_X000);
+    } else if (isYComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_0X00);
+    } else if (isZComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_00X0);
+    } else if (isWComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_000X);
+    } else if (isXYComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_XY00);
+    } else if (isZWComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_00XY);
+    } else {
+      O << getSrcSwizzle(AMDIL_SRC_DFLT);
+    }
+    O << "\n";
+    if (curTarget->device()->isSupported(
+          AMDILDeviceInfo::MacroDB)) {
+      mMacroIDs.insert(macronum);
+    } else {
+      mMFI->addCalledIntr(macronum);
+    }
+  } else {
+
+    // Print the assembly for the instruction.
+    // We want to make sure that we do HW constants
+    // before we do arena segment
+    // TODO: This is a hack to get around some
+    // conformance failures.
+    if (mMeta->useCompilerWrite(II)) {
+      O << "\tif_logicalz cb0[0].x\n";
+      if (mMFI->usesMem(AMDILDevice::RAW_UAV_ID)) {
+        O << "\tuav_raw_store_id("
+          << curTarget->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+          << ") ";
+        O << "mem0.x___, cb0[3].x, r0.0\n";
+      } else {
+        O << "\tuav_arena_store_id("
+          << curTarget->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)
+          << ")_size(dword) ";
+        O << "cb0[3].x, r0.0\n";
+      }
+      O << "\tendif\n";
+      mMFI->addMetadata(";memory:compilerwrite");
+    } else {
+      printInstruction(II, O);
+    }
+  }
+  O.flush();
+  OutStreamer.EmitRawText(StringRef(FunStr));
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,61 @@
+//===-- AMDILEGAsmPrinter.h -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Asm Printer Class for Evergreen N. Islands generation of cards. This class
+// handles all of the items that are unique to the these devices that must be handled
+// by the AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_EG_ASM_PRINTER_H_
+#define _AMDIL_EG_ASM_PRINTER_H_
+#include "AMDILAsmPrinter.h"
+
+namespace llvm
+{
+class LLVM_LIBRARY_VISIBILITY AMDILEGAsmPrinter : public AMDILAsmPrinter
+{
+public:
+  //
+  // Constructor for the AMDIL EG specific AsmPrinter class.
+  // Interface is defined by LLVM proper and should reference
+  // there for more information.
+  //
+  AMDILEGAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS);
+
+  //
+  // Destructor for the EG Asm Printer class that deletes
+  // all of the allocated memory
+  //
+  virtual ~AMDILEGAsmPrinter();
+
+  void
+  EmitInstruction(const MachineInstr *MI);
+
+  //
+  // @param F MachineFunction to print the assembly for
+  // @brief parse the specified machine function and print
+  // out the assembly for all the instructions in the function
+  //
+  bool
+  runOnMachineFunction(MachineFunction &F);
+
+protected:
+  //
+  // @param MI Machine instruction to emit the macro code for
+  //
+  // Emits a fully functional macro function that uses the argument
+  // registers as the macro arguments.
+  //
+  virtual void
+  emitMacroFunc(const MachineInstr *MI, OSTREAM_TYPE &O);
+
+}; // AMDILEGAsmPrinter
+} // end of llvm namespace
+#endif // _AMDIL_EG_ASM_PRINTER_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1288 @@
+//===-- AMDILEGIOExpansion.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of IO expansion class for evergreen and NI devices.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILIOExpansion.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cstdio>
+using namespace llvm;
+AMDILEGIOExpansion::AMDILEGIOExpansion(TargetMachine &tm,
+                                       CodeGenOpt::Level OptLevel) : AMDILImageExpansion(tm, OptLevel)
+{
+}
+
+AMDILEGIOExpansion::~AMDILEGIOExpansion()
+{
+}
+const char *AMDILEGIOExpansion::getPassName() const
+{
+  return "AMDIL EG/NI IO Expansion Pass";
+}
+bool
+AMDILEGIOExpansion::isImageIO(MachineInstr *MI)
+{
+  if (!MI->getOperand(0).isGlobal()) {
+    return false;
+  }
+  const llvm::StringRef& nameRef = MI->getOperand(0).getGlobal()->getName();
+  const char *name = nameRef.data();
+  if (nameRef.size() > 8 && !strncmp(name, "__amdil_", 8)) {
+    name += 8;
+    if (!strncmp(name, "sample_data", 11)
+        || !strncmp(name, "write_image", 11)
+        || !strncmp(name, "get_image", 9)
+       ) {
+      return true;
+    }
+  }
+  return false;
+}
+bool
+AMDILEGIOExpansion::isIOInstruction(MachineInstr *MI)
+{
+  if (!MI) {
+    return false;
+  }
+  switch (MI->getOpcode()) {
+  default:
+    return AMDILIOExpansion::isIOInstruction(MI);
+  case AMDIL::IMAGE1D_READ:
+  case AMDIL::IMAGE1D_READ_UNNORM:
+  case AMDIL::IMAGE1D_WRITE:
+  case AMDIL::IMAGE1D_INFO0:
+  case AMDIL::IMAGE1D_INFO1:
+  case AMDIL::IMAGE1DA_READ:
+  case AMDIL::IMAGE1DA_READ_UNNORM:
+  case AMDIL::IMAGE1DA_WRITE:
+  case AMDIL::IMAGE1DA_INFO0:
+  case AMDIL::IMAGE1DA_INFO1:
+  case AMDIL::IMAGE1DB_TXLD:
+  case AMDIL::IMAGE1DB_READ:
+  case AMDIL::IMAGE1DB_READ_UNNORM:
+  case AMDIL::IMAGE1DB_WRITE:
+  case AMDIL::IMAGE1DB_INFO0:
+  case AMDIL::IMAGE1DB_INFO1:
+  case AMDIL::IMAGE2D_READ:
+  case AMDIL::IMAGE2D_READ_UNNORM:
+  case AMDIL::IMAGE2D_WRITE:
+  case AMDIL::IMAGE2D_INFO0:
+  case AMDIL::IMAGE2D_INFO1:
+  case AMDIL::IMAGE2DA_READ:
+  case AMDIL::IMAGE2DA_READ_UNNORM:
+  case AMDIL::IMAGE2DA_WRITE:
+  case AMDIL::IMAGE2DA_INFO0:
+  case AMDIL::IMAGE2DA_INFO1:
+  case AMDIL::IMAGE3D_READ:
+  case AMDIL::IMAGE3D_READ_UNNORM:
+  case AMDIL::IMAGE3D_WRITE:
+  case AMDIL::IMAGE3D_INFO0:
+  case AMDIL::IMAGE3D_INFO1:
+    return true;
+  };
+  return false;
+}
+void
+AMDILEGIOExpansion::expandIOInstruction(MachineInstr *MI)
+{
+  assert(isIOInstruction(MI) && "Must be an IO instruction to "
+         "be passed to this function!");
+  switch (MI->getOpcode()) {
+  default:
+    AMDILIOExpansion::expandIOInstruction(MI);
+    break;
+  case AMDIL::IMAGE1D_READ:
+  case AMDIL::IMAGE1DA_READ:
+  case AMDIL::IMAGE1DB_TXLD:
+  case AMDIL::IMAGE1DB_READ:
+  case AMDIL::IMAGE2D_READ:
+  case AMDIL::IMAGE2DA_READ:
+  case AMDIL::IMAGE3D_READ:
+  case AMDIL::IMAGE1D_READ_UNNORM:
+  case AMDIL::IMAGE1DA_READ_UNNORM:
+  case AMDIL::IMAGE1DB_READ_UNNORM:
+  case AMDIL::IMAGE2D_READ_UNNORM:
+  case AMDIL::IMAGE2DA_READ_UNNORM:
+  case AMDIL::IMAGE3D_READ_UNNORM:
+    expandImageLoad(mBB, MI);
+    break;
+  case AMDIL::IMAGE1D_WRITE:
+  case AMDIL::IMAGE1DA_WRITE:
+  case AMDIL::IMAGE1DB_WRITE:
+  case AMDIL::IMAGE2D_WRITE:
+  case AMDIL::IMAGE2DA_WRITE:
+  case AMDIL::IMAGE3D_WRITE:
+    expandImageStore(mBB, MI);
+    break;
+  case AMDIL::IMAGE1D_INFO0:
+  case AMDIL::IMAGE1D_INFO1:
+  case AMDIL::IMAGE1DA_INFO0:
+  case AMDIL::IMAGE1DA_INFO1:
+  case AMDIL::IMAGE1DB_INFO0:
+  case AMDIL::IMAGE1DB_INFO1:
+  case AMDIL::IMAGE2D_INFO0:
+  case AMDIL::IMAGE2D_INFO1:
+  case AMDIL::IMAGE2DA_INFO0:
+  case AMDIL::IMAGE2DA_INFO1:
+  case AMDIL::IMAGE3D_INFO0:
+  case AMDIL::IMAGE3D_INFO1:
+    expandImageParam(mBB, MI);
+    break;
+  };
+}
+bool
+AMDILEGIOExpansion::isCacheableOp(MachineInstr *MI)
+{
+  AMDILAS::InstrResEnc curRes;
+  getAsmPrinterFlags(MI, curRes);
+  // We only support caching on UAV11 - JeffG
+  if (curRes.bits.ResourceID == 11) {
+    return curRes.bits.CacheableRead;
+  } else {
+    return false;
+  }
+}
+bool
+AMDILEGIOExpansion::isArenaOp(MachineInstr *MI)
+{
+  AMDILAS::InstrResEnc curRes;
+  getAsmPrinterFlags(MI, curRes);
+  return curRes.bits.ResourceID
+         == mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)
+         || curRes.bits.ResourceID >= ARENA_SEGMENT_RESERVED_UAVS;
+}
+void
+AMDILEGIOExpansion::expandPackedData(MachineInstr *MI)
+{
+  if (!isPackedData(MI)) {
+    return;
+  }
+  // There is a bug in the CAL compiler that incorrectly
+  // errors when the UBIT_INSERT instruction is used.
+  if (mSTM->calVersion() < CAL_VERSION_SC_137) {
+    AMDIL789IOExpansion::expandPackedData(MI);
+    return;
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  // If we have packed data, then the shift size is no longer
+  // the same as the load size and we need to adjust accordingly
+  switch(getPackedID(MI)) {
+  default:
+    break;
+  case PACK_V2I8: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+
+    .addImm(mMFI->addi32Literal(8)).addImm(mMFI->addi32Literal(8))
+    .addReg(AMDIL::Ry1011).addReg(AMDIL::Rx1011);
+  }
+  break;
+  case PACK_V4I8: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::Rxy1012)
+
+    .addReg(AMDIL::R1011);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
+
+    .addReg(AMDIL::R1011);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32),
+            AMDIL::Rxy1011)
+
+    .addImm(mMFI->addi64Literal(8ULL | (8ULL << 32)))
+    .addImm(mMFI->addi64Literal(8ULL | (8ULL << 32)))
+    .addReg(AMDIL::Rxy1012).addReg(AMDIL::Rxy1011);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+
+    .addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16))
+    .addReg(AMDIL::Ry1011).addReg(AMDIL::Rx1011);
+  }
+  break;
+  case PACK_V2I16: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+
+    .addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16))
+    .addReg(AMDIL::Ry1011).addReg(AMDIL::Rx1011);
+  }
+  break;
+  case PACK_V4I16: {
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::Rxy1012)
+
+    .addReg(AMDIL::R1011);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
+
+    .addReg(AMDIL::R1011);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::Rxy1011)
+
+    .addImm(mMFI->addi64Literal(16ULL | (16ULL << 32)))
+    .addImm(mMFI->addi64Literal(16ULL | (16ULL << 32)))
+    .addReg(AMDIL::Rxy1012).addReg(AMDIL::Rxy1011);
+  }
+  break;
+  case UNPACK_V2I8:
+  case UNPACK_V4I8:
+  case UNPACK_V2I16:
+  case UNPACK_V4I16:
+    AMDIL789IOExpansion::expandPackedData(MI);
+    break;
+  };
+}
+static bool
+isAlignedInst(MachineInstr *MI)
+{
+  if (!MI->memoperands_empty()) {
+    return ((*MI->memoperands_begin())->getAlignment()
+            & ((*MI->memoperands_begin())->getSize() - 1)) == 0;
+  }
+  return true;
+}
+
+void
+AMDILEGIOExpansion::expandGlobalLoad(MachineInstr *MI)
+{
+  bool usesArena = isArenaOp(MI);
+  bool cacheable = isCacheableOp(MI);
+  bool aligned = mSTM->calVersion() >= CAL_CACHED_ALIGNED_UAVS
+                 && isAlignedInst(MI);
+  uint32_t ID = getPointerID(MI);
+  mKM->setOutputInst();
+  // These instructions are generated before the current MI.
+  expandLoadStartCode(MI);
+  expandArenaSetup(MI);
+  DebugLoc DL = MI->getDebugLoc();
+  if (getMemorySize(MI) == 1) {
+    if (usesArena) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i8), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1010)
+      .addImm(ID);
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1010)
+      .addImm(mMFI->addi32Literal(3));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1010)
+      .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+      .addReg(AMDIL::Rx1008)
+      .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+                                   (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012)
+      .addReg(AMDIL::R1008)
+      .addImm(mMFI->addi32Literal(0));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1012)
+      .addImm(mMFI->addi32Literal(0))
+      .addImm(mMFI->addi32Literal(24));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Ry1012)
+      .addImm(mMFI->addi32Literal(8))
+      .addReg(AMDIL::Rx1008);
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rz1012)
+      .addImm(mMFI->addi32Literal(16))
+      .addReg(AMDIL::Rx1008);
+      if (cacheable) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+                AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+                AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+      }
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_v4i8), AMDIL::R1011)
+      .addReg(AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1008);
+    }
+  } else if (getMemorySize(MI) == 2) {
+    if (usesArena) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i16), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1010)
+      .addImm(ID);
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1010)
+      .addImm(mMFI->addi32Literal(3));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1008)
+      .addImm(mMFI->addi32Literal(1));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1010)
+      .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1008)
+      .addImm(mMFI->addi32Literal(16))
+      .addImm(mMFI->addi32Literal(0));
+      if (cacheable) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+                AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+                AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+      }
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i16), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1008);
+    }
+  } else if (getMemorySize(MI) == 4) {
+    if (usesArena) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1010)
+      .addImm(ID);
+    } else {
+      if (cacheable) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+                AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+                AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+      }
+    }
+  } else if (getMemorySize(MI) == 8) {
+    if (usesArena) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1010)
+      .addImm(ID);
+      if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Ry1011)
+        .addReg(AMDIL::Ry1010)
+        .addImm(ID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+
+        .addReg(AMDIL::R1010)
+        .addImm(2);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1008)
+        .addReg(AMDIL::Rx1007)
+        .addImm(ID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rxy1011)
+        .addReg(AMDIL::Rx1011)
+        .addReg(AMDIL::Rx1008);
+      }
+    } else {
+      if (cacheable) {
+        if (aligned) {
+          BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHEDALIGNED_v2i32),
+                  AMDIL::Rxy1011).addReg(AMDIL::Rx1010).addImm(ID);
+        } else {
+          BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v2i32),
+                  AMDIL::Rxy1011).addReg(AMDIL::Rx1010).addImm(ID);
+        }
+
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32),
+                AMDIL::Rxy1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+      }
+    }
+  } else {
+    if (usesArena) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1010)
+      .addImm(ID);
+      if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Ry1011)
+        .addReg(AMDIL::Ry1010)
+        .addImm(ID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rz1011)
+        .addReg(AMDIL::Rz1010)
+        .addImm(ID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rw1011)
+        .addReg(AMDIL::Rw1010)
+        .addImm(ID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+        .addReg(AMDIL::R1010)
+        .addImm(2);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1008)
+        .addReg(AMDIL::Rx1007)
+        .addImm(ID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rxy1011)
+        .addReg(AMDIL::Rx1011)
+        .addReg(AMDIL::Rx1008);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+        .addReg(AMDIL::R1010)
+        .addImm(3);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1008)
+        .addReg(AMDIL::Rx1007)
+        .addImm(ID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+
+        .addReg(AMDIL::R1010)
+        .addImm(4);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1006)
+        .addReg(AMDIL::Rx1007)
+        .addImm(ID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rzw1011)
+        .addReg(AMDIL::Rx1006)
+        .addReg(AMDIL::Rx1008);
+      }
+    } else {
+      if (cacheable) {
+        if (aligned) {
+          BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHEDALIGNED_v4i32),
+                  AMDIL::R1011).addReg(AMDIL::Rx1010).addImm(ID);
+        } else {
+          BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v4i32),
+                  AMDIL::R1011).addReg(AMDIL::Rx1010).addImm(ID);
+        }
+
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32),
+                AMDIL::R1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+      }
+    }
+  }
+  expandPackedData(MI);
+  unsigned dataReg = expandExtendLoad(MI);
+  if (!dataReg) {
+    dataReg = getDataReg(MI);
+  }
+  BuildMI(*mBB, MI, MI->getDebugLoc(),
+          mTII->get(getMoveInstFromID(
+                      MI->getDesc().OpInfo[0].RegClass)))
+  .addOperand(MI->getOperand(0))
+  .addReg(dataReg);
+  MI->getOperand(0).setReg(dataReg);
+}
+
+void
+AMDILEGIOExpansion::expandRegionLoad(MachineInstr *MI)
+{
+  bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+  if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+    mMFI->addErrorMsg(
+      amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+    return;
+  }
+  if (!HWRegion || !isHardwareRegion(MI)) {
+    return expandGlobalLoad(MI);
+  }
+  if (!mMFI->usesGDS() && mMFI->isKernel()) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned mulOp = 0;
+  uint32_t gID = getPointerID(MI);
+  assert(gID && "Found a GDS load that was incorrectly marked as zero ID!\n");
+  if (!gID) {
+    gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+    mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+  }
+  unsigned dstReg = AMDIL::R1011;
+  // These instructions are generated before the current MI.
+  expandLoadStartCode(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+    .addReg(AMDIL::Ry1010)
+
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rz1011)
+    .addReg(AMDIL::Rz1010)
+
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rw1011)
+    .addReg(AMDIL::Rw1010)
+
+    .addImm(gID);
+    break;
+  case 1:
+    if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(3));
+      mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
+              ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+      BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(8));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(gID);
+      // The instruction would normally fit in right here so everything created
+      // after this point needs to go into the afterInst vector.
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(8))
+      .addReg(AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1011);
+      dstReg = AMDIL::Rx1011;
+    } else {
+      if (isSWSExtLoadInst(MI)) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_i8), AMDIL::Rx1011)
+        .addReg(AMDIL::Rx1010)
+        .addImm(gID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_u8), AMDIL::Rx1011)
+        .addReg(AMDIL::Rx1010)
+        .addImm(gID);
+      }
+    }
+    break;
+  case 2:
+    if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(3));
+      mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
+              ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+      BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(8));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(gID);
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(16))
+      .addReg(AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1011);
+    } else {
+      if (isSWSExtLoadInst(MI)) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_i16), AMDIL::Rx1011)
+        .addReg(AMDIL::Rx1010)
+        .addImm(gID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_u16), AMDIL::Rx1011)
+        .addReg(AMDIL::Rx1010)
+        .addImm(gID);
+      }
+    }
+    break;
+  case 4:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+
+    .addImm(gID);
+    break;
+  case 8:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
+    .addReg(AMDIL::Rx1010)
+
+    .addImm(mMFI->addi64Literal(1ULL << 32));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+    .addReg(AMDIL::Ry1010)
+
+    .addImm(gID);
+    break;
+  };
+  expandPackedData(MI);
+  unsigned dataReg = expandExtendLoad(MI);
+  if (!dataReg) {
+    dataReg = getDataReg(MI);
+  }
+  BuildMI(*mBB, MI, MI->getDebugLoc(),
+          mTII->get(getMoveInstFromID(
+                      MI->getDesc().OpInfo[0].RegClass)))
+  .addOperand(MI->getOperand(0))
+  .addReg(dataReg);
+  MI->getOperand(0).setReg(dataReg);
+}
+void
+AMDILEGIOExpansion::expandLocalLoad(MachineInstr *MI)
+{
+  bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+  if (!HWLocal || !isHardwareLocal(MI)) {
+    return expandGlobalLoad(MI);
+  }
+  if (!mMFI->usesLDS() && mMFI->isKernel()) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+  }
+  uint32_t lID = getPointerID(MI);
+  assert(lID && "Found a LDS load that was incorrectly marked as zero ID!\n");
+  if (!lID) {
+    lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+    mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned mulOp = 0;
+  // These instructions are generated before the current MI.
+  expandLoadStartCode(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC_v4i32), AMDIL::R1011)
+
+    .addReg(AMDIL::Rx1010)
+    .addImm(lID);
+    break;
+  case 8:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC_v2i32), AMDIL::Rxy1011)
+    .addReg(AMDIL::Rx1010)
+
+    .addImm(lID);
+    break;
+  case 4:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+    .addReg(AMDIL::Rx1010)
+
+    .addImm(lID);
+    break;
+  case 1:
+    if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(3));
+      mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
+              ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+      BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(8));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(lID);
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(8))
+      .addReg(AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1011);
+    } else {
+      if (isSWSExtLoadInst(MI)) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_i8), AMDIL::Rx1011)
+        .addReg(AMDIL::Rx1010)
+
+        .addImm(lID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_u8), AMDIL::Rx1011)
+        .addReg(AMDIL::Rx1010)
+
+        .addImm(lID);
+      }
+    }
+    break;
+  case 2:
+    if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(3));
+      mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
+              ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+      BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(8));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(lID);
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(16))
+      .addReg(AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1011);
+    } else {
+      if (isSWSExtLoadInst(MI)) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_i16), AMDIL::Rx1011)
+        .addReg(AMDIL::Rx1010)
+
+        .addImm(lID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_u16), AMDIL::Rx1011)
+        .addReg(AMDIL::Rx1010)
+
+        .addImm(lID);
+      }
+    }
+    break;
+  }
+  expandPackedData(MI);
+  unsigned dataReg = expandExtendLoad(MI);
+  if (!dataReg) {
+    dataReg = getDataReg(MI);
+  }
+  BuildMI(*mBB, MI, MI->getDebugLoc(),
+          mTII->get(getMoveInstFromID(
+                      MI->getDesc().OpInfo[0].RegClass)))
+  .addOperand(MI->getOperand(0))
+  .addReg(dataReg);
+  MI->getOperand(0).setReg(dataReg);
+}
+void
+AMDILEGIOExpansion::expandGlobalStore(MachineInstr *MI)
+{
+  bool usesArena = isArenaOp(MI);
+  uint32_t ID = getPointerID(MI);
+  mKM->setOutputInst();
+  DebugLoc DL = MI->getDebugLoc();
+  // These instructions are expandted before the current MI.
+  expandStoreSetupCode(MI);
+  expandArenaSetup(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    if (usesArena) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(ID);
+      if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Ry1010)
+        .addReg(AMDIL::Ry1011)
+
+        .addImm(ID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rz1010)
+        .addReg(AMDIL::Rz1011)
+
+        .addImm(ID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rw1010)
+        .addReg(AMDIL::Rw1011)
+
+        .addImm(ID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+
+        .addReg(AMDIL::R1010)
+        .addImm(2);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
+
+        .addReg(AMDIL::R1011)
+        .addImm(2);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+        .addReg(AMDIL::Rx1008)
+
+        .addImm(ID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+
+        .addReg(AMDIL::R1010)
+        .addImm(3);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
+
+        .addReg(AMDIL::R1011)
+        .addImm(3);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+        .addReg(AMDIL::Rx1008)
+
+        .addImm(ID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+
+        .addReg(AMDIL::R1010)
+        .addImm(4);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
+
+        .addReg(AMDIL::R1011)
+        .addImm(4);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+        .addReg(AMDIL::Rx1008)
+
+        .addImm(ID);
+      }
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
+      .addReg(AMDIL::Rx1010)
+
+      .addReg(AMDIL::R1011)
+      .addImm(ID);
+    }
+    break;
+  case 1:
+    if (usesArena) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(0xFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i8), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(ID);
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+      .addReg(AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(ID);
+    }
+    break;
+  case 2:
+    if (usesArena) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(0xFFFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i16), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(ID);
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+      .addReg(AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(ID);
+    }
+    break;
+  case 4:
+    if (usesArena) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(ID);
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+      .addReg(AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(ID);
+    }
+    break;
+  case 8:
+    if (usesArena) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(ID);
+      if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Ry1010)
+        .addReg(AMDIL::Ry1011)
+
+        .addImm(ID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+        .addReg(AMDIL::Rxy1010)
+
+        .addImm(2);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
+        .addReg(AMDIL::Rxy1011)
+
+        .addImm(2);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+        .addReg(AMDIL::Rx1008)
+
+        .addImm(ID);
+      }
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEMxy)
+      .addReg(AMDIL::Rx1010)
+      .addReg(AMDIL::Rxy1011)
+
+      .addImm(ID);
+    }
+    break;
+  };
+}
+void
+AMDILEGIOExpansion::expandRegionStore(MachineInstr *MI)
+{
+  bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+  if (!HWRegion || !isHardwareRegion(MI)) {
+    return expandGlobalStore(MI);
+  }
+  mKM->setOutputInst();
+  if (!mMFI->usesGDS() && mMFI->isKernel()) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+  }
+  uint32_t gID = getPointerID(MI);
+  assert(gID && "Found a GDS store that was incorrectly marked as zero ID!\n");
+  if (!gID) {
+    gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+    mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned mulOp = HWRegion ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32;
+  // These instructions are expandted before the current MI.
+  expandStoreSetupCode(MI);
+  expandArenaSetup(MI);
+  switch (getMemorySize(MI)) {
+  default:
+
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
+    .addReg(AMDIL::Ry1011)
+
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rz1010)
+    .addReg(AMDIL::Rz1011)
+
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rw1010)
+    .addReg(AMDIL::Rw1011)
+
+    .addImm(gID);
+    break;
+  case 1:
+    if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(0xFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1012)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(3));
+
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+      .addReg(AMDIL::Rx1008)
+      .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+                                   (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+      BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1006)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(8));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(0xFFFFFF00))
+      .addImm(mMFI->addi32Literal(0x00FFFFFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Ry1007)
+      .addReg(AMDIL::Ry1008)
+      .addReg(AMDIL::Rx1007)
+
+      .addImm(mMFI->addi32Literal(0xFF00FFFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rz1012)
+      .addReg(AMDIL::Rz1008)
+      .addReg(AMDIL::Rx1007)
+
+      .addImm(mMFI->addi32Literal(0xFFFF00FF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1007);
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_R_MSKOR_NORET))
+      .addReg(AMDIL::Rx1010)
+      .addImm(mMFI->addi32Literal(0))
+      .addReg(AMDIL::Rx1012)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(gID);
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_i8), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(gID);
+    }
+    break;
+  case 2:
+    if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(0x0000FFFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(3));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(1));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(0x0000FFFF))
+      .addImm(mMFI->addi32Literal(0xFFFF0000));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(16))
+      .addImm(mMFI->addi32Literal(0));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1008);
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_R_MSKOR_NORET))
+      .addReg(AMDIL::Rx1010)
+      .addImm(mMFI->addi32Literal(0))
+      .addReg(AMDIL::Rx1012)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(gID);
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_i16), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(gID);
+    }
+    break;
+  case 4:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+
+    .addImm(gID);
+    break;
+  case 8:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
+    .addReg(AMDIL::Rx1010)
+
+    .addImm(mMFI->addi64Literal(1ULL << 32));
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+
+    .addImm(gID);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
+    .addReg(AMDIL::Ry1011)
+
+    .addImm(gID);
+    break;
+  };
+}
+
+void
+AMDILEGIOExpansion::expandLocalStore(MachineInstr *MI)
+{
+  bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+  if (!HWLocal || !isHardwareLocal(MI)) {
+    return expandGlobalStore(MI);
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  if (!mMFI->usesLDS() && mMFI->isKernel()) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+  }
+  uint32_t lID = getPointerID(MI);
+  assert(lID && "Found a LDS store that was incorrectly marked as zero ID!\n");
+  if (!lID) {
+    lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+    mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+  }
+  unsigned mulOp = HWLocal ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32;
+  // These instructions are expandted before the current MI.
+  expandStoreSetupCode(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC_v4i32), AMDIL::MEM)
+
+    .addReg(AMDIL::Rx1010)
+    .addReg(AMDIL::R1011)
+    .addImm(lID);
+    break;
+  case 8:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC_v2i32), AMDIL::MEMxy)
+    .addReg(AMDIL::Rx1010)
+    .addReg(AMDIL::Rxy1011)
+
+    .addImm(lID);
+    break;
+  case 4:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTORE), AMDIL::Rx1010)
+    .addReg(AMDIL::Rx1011)
+
+    .addImm(lID);
+    break;
+  case 1:
+    if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(0xFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1012)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(3));
+
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+      .addReg(AMDIL::Rx1008)
+      .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+                                   (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+      BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1006)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(8));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(0xFFFFFF00))
+      .addImm(mMFI->addi32Literal(0x00FFFFFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+      .addReg(AMDIL::Ry1008)
+      .addReg(AMDIL::Rx1007)
+
+      .addImm(mMFI->addi32Literal(0xFF00FFFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+      .addReg(AMDIL::Rz1008)
+      .addReg(AMDIL::Rx1007)
+
+      .addImm(mMFI->addi32Literal(0xFFFF00FF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1006);
+
+      if (mSTM->calVersion() >= CAL_VERSION_SC_137) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_MSKOR_NORET))
+        .addReg(AMDIL::Rx1010)
+        .addImm(mMFI->addi32Literal(0))
+        .addReg(AMDIL::Rx1012)
+        .addReg(AMDIL::Rx1011)
+
+        .addImm(lID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET),
+                AMDIL::Rx1010)
+        .addReg(AMDIL::Rx1012)
+
+        .addImm(lID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_OR_NORET),
+                AMDIL::Rx1010)
+        .addReg(AMDIL::Rx1011)
+
+        .addImm(lID);
+      }
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTORE_i8), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(lID);
+    }
+    break;
+  case 2:
+    if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(mMFI->addi32Literal(0x0000FFFF));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1010)
+
+      .addImm(mMFI->addi32Literal(3));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(1));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(0x0000FFFF))
+      .addImm(mMFI->addi32Literal(0xFFFF0000));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+      .addReg(AMDIL::Rx1008)
+
+      .addImm(mMFI->addi32Literal(16))
+      .addImm(mMFI->addi32Literal(0));
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1008);
+
+      if (mSTM->calVersion() >= CAL_VERSION_SC_137) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_MSKOR_NORET))
+        .addReg(AMDIL::Rx1010)
+        .addImm(mMFI->addi32Literal(0))
+        .addReg(AMDIL::Rx1012)
+        .addReg(AMDIL::Rx1011)
+
+        .addImm(lID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET),
+                AMDIL::Rx1010)
+        .addReg(AMDIL::Rx1012)
+
+        .addImm(lID);
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_OR_NORET),
+                AMDIL::Rx1010)
+        .addReg(AMDIL::Rx1011)
+
+        .addImm(lID);
+      }
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTORE_i16), AMDIL::Rx1010)
+      .addReg(AMDIL::Rx1011)
+
+      .addImm(lID);
+    }
+    break;
+  }
+}
+
+
+void
+AMDILEGIOExpansion::expandStoreSetupCode(MachineInstr *MI)
+{
+  AMDIL789IOExpansion::expandStoreSetupCode(MI);
+}
+void
+AMDILEGIOExpansion::expandArenaSetup(MachineInstr *MI)
+{
+  if (!isArenaOp(MI)) {
+    return;
+  }
+  const MCInstrDesc &TID = (MI->getDesc());
+  const MCOperandInfo &TOI = TID.OpInfo[0];
+  unsigned short RegClass = TOI.RegClass;
+  DebugLoc DL = MI->getDebugLoc();
+  switch (RegClass) {
+  case AMDIL::GPRV4I16RegClassID:
+  case AMDIL::GPRI64RegClassID:
+  case AMDIL::GPRF64RegClassID:
+  case AMDIL::GPRV2I32RegClassID:
+  case AMDIL::GPRV2F32RegClassID:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
+    .addReg(AMDIL::Rx1010)
+
+    .addImm(mMFI->addi64Literal(4ULL << 32));
+    break;
+  default:
+
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+    .addReg(AMDIL::Rx1010)
+    .addImm(mMFI->addi128Literal(4ULL << 32, 8ULL | (12ULL << 32)));
+    break;
+  case AMDIL::GPRI8RegClassID:
+  case AMDIL::GPRV2I8RegClassID:
+  case AMDIL::GPRI16RegClassID:
+  case AMDIL::GPRV2I16RegClassID:
+  case AMDIL::GPRV4I8RegClassID:
+  case AMDIL::GPRI32RegClassID:
+  case AMDIL::GPRF32RegClassID:
+    break;
+  };
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,79 @@
+//===-- AMDILELFWriterInfo.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL.h"
+#include "AMDILELFWriterInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Implementation of the AMDILELFWriterInfo class
+//===----------------------------------------------------------------------===//
+AMDILELFWriterInfo::AMDILELFWriterInfo(bool is64bit, bool endian)
+  : TargetELFWriterInfo(is64bit, endian)
+{
+}
+
+AMDILELFWriterInfo::~AMDILELFWriterInfo()
+{
+}
+
+unsigned AMDILELFWriterInfo::getRelocationType(unsigned MachineRelTy) const
+{
+  assert(0 && "What do we do here? Lets assert an analyze");
+  return 0;
+}
+
+bool AMDILELFWriterInfo::hasRelocationAddend() const
+{
+  assert(0 && "What do we do here? Lets assert an analyze");
+  return false;
+}
+
+long int AMDILELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+    long int Modifier) const
+{
+  assert(0 && "What do we do here? Lets assert an analyze");
+  return 0;
+}
+
+unsigned AMDILELFWriterInfo::getRelocationTySize(unsigned RelTy) const
+{
+  assert(0 && "What do we do here? Lets assert an analyze");
+  return 0;
+}
+
+bool AMDILELFWriterInfo::isPCRelativeRel(unsigned RelTy) const
+{
+  assert(0 && "What do we do here? Lets assert an analyze");
+  return false;
+}
+
+unsigned AMDILELFWriterInfo::getAbsoluteLabelMachineRelTy() const
+{
+  assert(0 && "What do we do here? Lets assert an analyze");
+  return 0;
+}
+
+long int AMDILELFWriterInfo::computeRelocation(unsigned SymOffset,
+    unsigned RelOffset,
+    unsigned RelTy) const
+{
+  assert(0 && "What do we do here? Lets assert an analyze");
+  return 0;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,57 @@
+//===-- AMDILELFWriterInfo.h ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_ELF_WRITER_INFO_H_
+#define _AMDIL_ELF_WRITER_INFO_H_
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm
+{
+class AMDILELFWriterInfo : public TargetELFWriterInfo
+{
+public:
+  AMDILELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+  virtual ~AMDILELFWriterInfo();
+
+  /// getRelocationType - Returns the target specific ELF Relocation type.
+  /// 'MachineRelTy' contains the object code independent relocation type
+  virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+  /// 'hasRelocationAddend - True if the target uses and addend in the
+  /// ELF relocation entry.
+  virtual bool hasRelocationAddend() const;
+
+  /// getDefaultAddendForRelTy - Gets the default addend value for a
+  /// relocation entry based on the target ELF relocation type.
+  virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+      long int Modifier = 0) const;
+
+  /// getRelTySize - Returns the size of relocatble field in bits
+  virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+  /// isPCRelativeRel - True if the relocation type is pc relative
+  virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+  /// getJumpTableRelocationTy - Returns the machine relocation type used
+  /// to reference a jumptable.
+  virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+  /// computeRelocation - Some relocatable fields could be relocated
+  /// directly, avoiding the relocation symbol emission, compute the
+  /// final relocation value for this symbol.
+  virtual long int computeRelocation(unsigned SymOffset,
+                                     unsigned RelOffset,
+                                     unsigned RelTy) const;
+};
+} // namespace llvm
+#endif // _AMDIL_ELF_WRITER_INFO_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,591 @@
+//===-- AMDILEnumeratedTypes.td -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ILEnumreatedTypes.td - The IL Enumerated Types - Micah Villmow - 9-3-2008.
+//
+//===----------------------------------------------------------------------===//
+
+class ILShader<bits<8> val> {
+    bits<8> Value = val;
+}
+// Table 5-1
+def IL_SHADER_PIXEL : ILShader<0>;
+def IL_SHADER_COMPUTE : ILShader<1>;
+
+// Section 5.2 IL RegType
+class ILRegType<bits<6> val> {
+    bits<6> Value = val;
+}
+// Table 5-2
+def IL_REGTYPE_TEMP      : ILRegType<0>;
+def IL_REGTYPE_WINCOORD  : ILRegType<1>;
+def IL_REGTYPE_CONST_BUF : ILRegType<2>;
+def IL_REGTYPE_LITERAL   : ILRegType<3>;
+def IL_REGTYPE_ITEMP     : ILRegType<4>;
+def IL_REGTYPE_GLOBAL    : ILRegType<5>;
+
+// Section 5.3 IL Component Select
+class ILComponentSelect<bits<3> val, string text> {
+     bits<3> Value = val;
+     string Text = text;
+}
+// Table 5-3
+def IL_COMPSEL_X : ILComponentSelect<0, "x">;
+def IL_COMPSEL_Y : ILComponentSelect<1, "y">;
+def IL_COMPSEL_Z : ILComponentSelect<2, "z">;
+def IL_COMPSEL_W : ILComponentSelect<3, "w">;
+def IL_COMPSEL_0 : ILComponentSelect<4, "0">;
+def IL_COMPSEL_1 : ILComponentSelect<5, "1">;
+
+// Section 5.4 IL Mod Dst Comp
+class ILModDstComp<bits<2> val, string text> {
+    bits<2> Value = val;
+    string Text = text;
+}
+// Table 5-4
+def IL_MODCOMP_NOWRITE : ILModDstComp<0, "_">;
+def IL_MODCOMP_WRITE_X : ILModDstComp<1, "x">;
+def IL_MODCOMP_WRITE_y : ILModDstComp<1, "y">;
+def IL_MODCOMP_WRITE_z : ILModDstComp<1, "z">;
+def IL_MODCOMP_WRITE_w : ILModDstComp<1, "w">;
+def IL_MODCOMP_0       : ILModDstComp<2, "0">;
+def IL_MODCOMP_1       : ILModDstComp<3, "1">;
+
+// Section 5.5 IL Import Usage
+class ILImportUsage<bits<1> val, string usage> {
+    bits<1> Value = val;
+    string Text = usage;
+}
+// Table 5-5
+def IL_IMPORTUSAGE_WINCOORD : ILImportUsage<0, "_usage(wincoord)">;
+
+// Section 5.6 Il Shift Scale
+class ILShiftScale<bits<4> val, string scale> {
+    bits<4> Value = val;
+    string Text = scale;
+}
+
+// Table 5-6
+def IL_SHIFT_NONE   : ILShiftScale<0, "">;
+def IL_SHIFT_X2     : ILShiftScale<1, "_x2">;
+def IL_SHIFT_X4     : ILShiftScale<2, "_x4">;
+def IL_SHIFT_X8     : ILShiftScale<3, "_x8">;
+def IL_SHIFT_D2     : ILShiftScale<4, "_d2">;
+def IL_SHIFT_D4     : ILShiftScale<5, "_d4">;
+def IL_SHIFT_D8     : ILShiftScale<6, "_d8">;
+
+// Section 5.7 IL Divide Component
+class ILDivComp<bits<3> val, string divcomp> {
+    bits<3> Value = val;
+    string Text = divcomp;
+}
+
+// Table 5-7
+def IL_DIVCOMP_NONE : ILDivComp<0, "_divcomp(none)">;
+def IL_DIVCOMP_Y    : ILDivComp<1, "_divcomp(y)">;
+def IL_DIVCOMP_Z    : ILDivComp<2, "_divcomp(z)">;
+def IL_DIVCOMP_W    : ILDivComp<3, "_divcomp(w)">;
+//def IL_DIVCOMP_UNKNOWN : ILDivComp<4, "_divcomp(unknown)">;
+
+// Section 5.8 IL Relational Op
+class ILRelOp<bits<3> val, string op> {
+    bits<3> Value = val;
+    string Text = op;
+}
+
+// Table 5-8
+def IL_RELOP_EQ : ILRelOp<0, "_relop(eq)">;
+def IL_RELOP_NE : ILRelOp<1, "_relop(ne)">;
+def IL_RELOP_GT : ILRelOp<2, "_relop(gt)">;
+def IL_RELOP_GE : ILRelOp<3, "_relop(ge)">;
+def IL_RELOP_LT : ILRelOp<4, "_relop(lt)">;
+def IL_RELOP_LE : ILRelOp<5, "_relop(le)">;
+
+// Section 5.9 IL Zero Op
+class ILZeroOp<bits<3> val, string behavior> {
+    bits<3> Value = val;
+    string Text = behavior;
+}
+
+// Table 5-9
+def IL_ZEROOP_FLTMAX    : ILZeroOp<0, "_zeroop(fltmax)">;
+def IL_ZEROOP_0         : ILZeroOp<1, "_zeroop(zero)">;
+def IL_ZEROOP_INFINITY  : ILZeroOp<2, "_zeroop(infinity)">;
+def IL_ZEROOP_INF_ELSE_MAX : ILZeroOp<3, "_zeroop(inf_else_max)">;
+
+// Section 5.10 IL Cmp Value
+class ILCmpValue<bits<3> val, string num> {
+    bits<3> Value = val;
+    string Text = num;
+}
+
+// Table 5-10
+def IL_CMPVAL_0_0     : ILCmpValue<0, "0.0">;
+def IL_CMPVAL_0_5     : ILCmpValue<1, "0.5">;
+def IL_CMPVAL_1_0     : ILCmpValue<2, "1.0">;
+def IL_CMPVAL_NEG_0_5 : ILCmpValue<3, "-0.5">;
+def IL_CMPVAL_NEG_1_0 : ILCmpValue<4, "-1.0">;
+
+// Section 5.11 IL Addressing
+class ILAddressing<bits<3> val> {
+    bits<3> Value = val;
+}
+
+// Table 5-11
+def IL_ADDR_ABSOLUTE     : ILAddressing<0>;
+def IL_ADDR_RELATIVE     : ILAddressing<1>;
+def IL_ADDR_REG_RELATIVE : ILAddressing<2>;
+
+// Section 5.11 IL Element Format
+class ILElementFormat<bits<5> val> {
+    bits<5> Value = val;
+}
+
+// Table 5-11
+def IL_ELEMENTFORMAT_UNKNOWN : ILElementFormat<0>;
+def IL_ELEMENTFORMAT_SNORM   : ILElementFormat<1>;
+def IL_ELEMENTFORMAT_UNORM   : ILElementFormat<2>;
+def IL_ELEMENTFORMAT_SINT    : ILElementFormat<3>;
+def IL_ELEMENTFORMAT_UINT    : ILElementFormat<4>;
+def IL_ELEMENTFORMAT_FLOAT   : ILElementFormat<5>;
+def IL_ELEMENTFORMAT_SRGB    : ILElementFormat<6>;
+def IL_ELEMENTFORMAT_MIXED   : ILElementFormat<7>;
+def IL_ELEMENTFORMAT_Last    : ILElementFormat<8>;
+
+// Section 5.12 IL Op Code
+class ILOpCode<bits<16> val = -1, string cmd> {
+    bits<16> Value = val;
+    string Text = cmd;
+}
+
+// Table 5-12
+def IL_DCL_CONST_BUFFER         : ILOpCode<0, "dcl_cb">;
+def IL_DCL_INDEXED_TEMP_ARRAY   : ILOpCode<1, "dcl_index_temp_array">;
+def IL_DCL_INPUT                : ILOpCode<2, "dcl_input">;
+def IL_DCL_LITERAL              : ILOpCode<3, "dcl_literal">;
+def IL_DCL_OUTPUT               : ILOpCode<4, "dcl_output">;
+def IL_DCL_RESOURCE             : ILOpCode<5, "dcl_resource">;
+def IL_OP_ABS                   : ILOpCode<6, "abs">;
+def IL_OP_ADD                   : ILOpCode<7, "add">;
+def IL_OP_AND                   : ILOpCode<8, "iand">;
+def IL_OP_BREAK                 : ILOpCode<9, "break">;
+def IL_OP_BREAK_LOGICALNZ       : ILOpCode<10, "break_logicalnz">;
+def IL_OP_BREAK_LOGICALZ        : ILOpCode<11, "break_logicalz">;
+def IL_OP_BREAKC                : ILOpCode<12, "breakc">;
+def IL_OP_CALL                  : ILOpCode<13, "call">;
+def IL_OP_CALL_LOGICALNZ        : ILOpCode<14, "call_logicalnz">;
+def IL_OP_CALL_LOGICALZ         : ILOpCode<15, "call_logicalz">;
+def IL_OP_CASE                  : ILOpCode<16, "case">;
+def IL_OP_CLG                   : ILOpCode<17, "clg">;
+def IL_OP_CMOV                  : ILOpCode<18, "cmov">;
+def IL_OP_CMOV_LOGICAL          : ILOpCode<19, "cmov_logical">;
+def IL_OP_CMP                   : ILOpCode<20, "cmp">;
+def IL_OP_CONTINUE              : ILOpCode<21, "continue">;
+def IL_OP_CONTINUE_LOGICALNZ    : ILOpCode<22, "continue_logicalnz">;
+def IL_OP_CONTINUE_LOGICALZ     : ILOpCode<23, "continue_logicalz">;
+def IL_OP_CONTINUEC             : ILOpCode<24, "continuec">;
+def IL_OP_COS                   : ILOpCode<25, "cos">;
+def IL_OP_COS_VEC               : ILOpCode<26, "cos_vec">;
+def IL_OP_D_2_F                 : ILOpCode<27, "d2f">;
+def IL_OP_D_ADD                 : ILOpCode<28, "dadd">;
+def IL_OP_D_EQ                  : ILOpCode<29, "deq">;
+def IL_OP_D_FRC                 : ILOpCode<30, "dfrac">;
+def IL_OP_D_FREXP               : ILOpCode<31, "dfrexp">;
+def IL_OP_D_GE                  : ILOpCode<32, "dge">;
+def IL_OP_D_LDEXP               : ILOpCode<33, "dldexp">;
+def IL_OP_D_LT                  : ILOpCode<34, "dlt">;
+def IL_OP_D_MAD                 : ILOpCode<35, "dmad">;
+def IL_OP_D_MUL                 : ILOpCode<36, "dmul">;
+def IL_OP_D_NE                  : ILOpCode<37, "dne">;
+def IL_OP_DEFAULT               : ILOpCode<38, "default">;
+def IL_OP_DISCARD_LOGICALNZ     : ILOpCode<39, "discard_logicalnz">;
+def IL_OP_DISCARD_LOGICALZ      : ILOpCode<40, "discard_logicalz">;
+def IL_OP_DIV                   : ILOpCode<41, "div_zeroop(infinity)">;
+def IL_OP_DP2                   : ILOpCode<42, "dp2">;
+def IL_OP_DP3                   : ILOpCode<43, "dp3">;
+def IL_OP_DP4                   : ILOpCode<44, "dp4">;
+def IL_OP_ELSE                  : ILOpCode<45, "else">;
+def IL_OP_END                   : ILOpCode<46, "end">;
+def IL_OP_ENDFUNC               : ILOpCode<47, "endfunc">;
+def IL_OP_ENDIF                 : ILOpCode<48, "endif">;
+def IL_OP_ENDLOOP               : ILOpCode<49, "endloop">;
+def IL_OP_ENDMAIN               : ILOpCode<50, "endmain">;
+def IL_OP_ENDSWITCH             : ILOpCode<51, "endswitch">;
+def IL_OP_EQ                    : ILOpCode<52, "eq">;
+def IL_OP_EXP                   : ILOpCode<53, "exp">;
+def IL_OP_EXP_VEC               : ILOpCode<54, "exp_vec">;
+def IL_OP_F_2_D                 : ILOpCode<55, "f2d">;
+def IL_OP_FLR                   : ILOpCode<56, "flr">;
+def IL_OP_FRC                   : ILOpCode<57, "frc">;
+def IL_OP_FTOI                  : ILOpCode<58, "ftoi">;
+def IL_OP_FTOU                  : ILOpCode<59, "ftou">;
+def IL_OP_FUNC                  : ILOpCode<60, "func">;
+def IL_OP_GE                    : ILOpCode<61, "ge">;
+def IL_OP_I_ADD                 : ILOpCode<62, "iadd">;
+def IL_OP_I_EQ                  : ILOpCode<63, "ieq">;
+def IL_OP_I_GE                  : ILOpCode<64, "ige">;
+def IL_OP_I_LT                  : ILOpCode<65, "ilt">;
+def IL_OP_I_MAD                 : ILOpCode<66, "imad">;
+def IL_OP_I_MAX                 : ILOpCode<67, "imax">;
+def IL_OP_I_MIN                 : ILOpCode<68, "imin">;
+def IL_OP_I_MUL                 : ILOpCode<69, "imul">;
+def IL_OP_I_MUL_HIGH            : ILOpCode<70, "imul_high">;
+def IL_OP_I_NE                  : ILOpCode<71, "ine">;
+def IL_OP_I_NEGATE              : ILOpCode<72, "inegate">;
+def IL_OP_I_NOT                 : ILOpCode<73, "inot">;
+def IL_OP_I_OR                  : ILOpCode<74, "ior">;
+def IL_OP_I_SHL                 : ILOpCode<75, "ishl">;
+def IL_OP_I_SHR                 : ILOpCode<76, "ishr">;
+def IL_OP_I_XOR                 : ILOpCode<77, "ixor">;
+def IL_OP_IF_LOGICALNZ          : ILOpCode<78, "if_logicalnz">;
+def IL_OP_IF_LOGICALZ           : ILOpCode<79, "if_logicalz">;
+def IL_OP_IFC                   : ILOpCode<80, "ifc">;
+def IL_OP_ITOF                  : ILOpCode<81, "itof">;
+def IL_OP_LN                    : ILOpCode<82, "ln">;
+def IL_OP_LOG                   : ILOpCode<83, "log">;
+def IL_OP_LOG_VEC               : ILOpCode<84, "log_vec">;
+def IL_OP_LOOP                  : ILOpCode<85, "loop">;
+def IL_OP_LT                    : ILOpCode<86, "lt">;
+def IL_OP_MAD                   : ILOpCode<87, "mad_ieee">;
+def IL_OP_MAX                   : ILOpCode<88, "max_ieee">;
+def IL_OP_MIN                   : ILOpCode<89, "min_ieee">;
+def IL_OP_MOD                   : ILOpCode<90, "mod_ieee">;
+def IL_OP_MOV                   : ILOpCode<91, "mov">;
+def IL_OP_MUL_IEEE              : ILOpCode<92, "mul_ieee">;
+def IL_OP_NE                    : ILOpCode<93, "ne">;
+def IL_OP_NRM                   : ILOpCode<94, "nrm_nrm4_zeroop(zero)">;
+def IL_OP_POW                   : ILOpCode<95, "pow">;
+def IL_OP_RCP                   : ILOpCode<96, "rcp">;
+def IL_OP_RET                   : ILOpCode<97, "ret">;
+def IL_OP_RET_DYN               : ILOpCode<98, "ret_dyn">;
+def IL_OP_RET_LOGICALNZ         : ILOpCode<99, "ret_logicalnz">;
+def IL_OP_RET_LOGICALZ          : ILOpCode<100, "ret_logicalz">;
+def IL_OP_RND                   : ILOpCode<101, "rnd">;
+def IL_OP_ROUND_NEAR            : ILOpCode<102, "round_nearest">;
+def IL_OP_ROUND_NEG_INF         : ILOpCode<103, "round_neginf">;
+def IL_OP_ROUND_POS_INF         : ILOpCode<104, "round_plusinf">;
+def IL_OP_ROUND_ZERO            : ILOpCode<105, "round_z">;
+def IL_OP_RSQ                   : ILOpCode<106, "rsq">;
+def IL_OP_RSQ_VEC               : ILOpCode<107, "rsq_vec">;
+def IL_OP_SAMPLE                : ILOpCode<108, "sample">;
+def IL_OP_SAMPLE_L              : ILOpCode<109, "sample_l">;
+def IL_OP_SET                   : ILOpCode<110, "set">;
+def IL_OP_SGN                   : ILOpCode<111, "sgn">;
+def IL_OP_SIN                   : ILOpCode<112, "sin">;
+def IL_OP_SIN_VEC               : ILOpCode<113, "sin_vec">;
+def IL_OP_SUB                   : ILOpCode<114, "sub">;
+def IL_OP_SWITCH                : ILOpCode<115, "switch">;
+def IL_OP_TRC                   : ILOpCode<116, "trc">;
+def IL_OP_U_DIV                 : ILOpCode<117, "udiv">;
+def IL_OP_U_GE                  : ILOpCode<118, "uge">;
+def IL_OP_U_LT                  : ILOpCode<119, "ult">;
+def IL_OP_U_MAD                 : ILOpCode<120, "umad">;
+def IL_OP_U_MAX                 : ILOpCode<121, "umax">;
+def IL_OP_U_MIN                 : ILOpCode<122, "umin">;
+def IL_OP_U_MOD                 : ILOpCode<123, "umod">;
+def IL_OP_U_MUL                 : ILOpCode<124, "umul">;
+def IL_OP_U_MUL_HIGH            : ILOpCode<125, "umul_high">;
+def IL_OP_U_SHR                 : ILOpCode<126, "ushr">;
+def IL_OP_UTOF                  : ILOpCode<127, "utof">;
+def IL_OP_WHILE                 : ILOpCode<128, "whileloop">;
+// SC IL instructions that are not in CAL IL
+def IL_OP_ACOS                  : ILOpCode<129, "acos">;
+def IL_OP_ASIN                  : ILOpCode<130, "asin">;
+def IL_OP_EXN                   : ILOpCode<131, "exn">;
+def IL_OP_UBIT_REVERSE          : ILOpCode<132, "ubit_reverse">;
+def IL_OP_UBIT_EXTRACT          : ILOpCode<133, "ubit_extract">;
+def IL_OP_IBIT_EXTRACT          : ILOpCode<134, "ibit_extract">;
+def IL_OP_SQRT                  : ILOpCode<135, "sqrt">;
+def IL_OP_SQRT_VEC              : ILOpCode<136, "sqrt_vec">;
+def IL_OP_ATAN                  : ILOpCode<137, "atan">;
+def IL_OP_TAN                   : ILOpCode<137, "tan">;
+def IL_OP_D_DIV                 : ILOpCode<138, "ddiv">;
+def IL_OP_F_NEG                 : ILOpCode<139, "mov">;
+def IL_OP_GT                    : ILOpCode<140, "gt">;
+def IL_OP_LE                    : ILOpCode<141, "lt">;
+def IL_OP_DIST                  : ILOpCode<142, "dist">;
+def IL_OP_LEN                   : ILOpCode<143, "len">;
+def IL_OP_MACRO                 : ILOpCode<144, "mcall">;
+def IL_OP_INTR                  : ILOpCode<145, "call">;
+def IL_OP_I_FFB_HI              : ILOpCode<146, "ffb_hi">;
+def IL_OP_I_FFB_LO              : ILOpCode<147, "ffb_lo">;
+def IL_OP_BARRIER               : ILOpCode<148, "fence_threads_memory_lds">;
+def IL_OP_BARRIER_LOCAL         : ILOpCode<149, "fence_threads_lds">;
+def IL_OP_BARRIER_GLOBAL        : ILOpCode<150, "fence_threads_memory">;
+def IL_OP_FENCE                 : ILOpCode<151, "fence_lds_memory">;
+def IL_OP_FENCE_READ_ONLY       : ILOpCode<152, "fence_lds_mem_read_only">;
+def IL_OP_FENCE_WRITE_ONLY      : ILOpCode<153, "fence_lds_mem_write_only">;
+def IL_PSEUDO_INST              : ILOpCode<154, ";Pseudo Op">;
+def IL_OP_UNPACK_0              : ILOpCode<155, "unpack0">;
+def IL_OP_UNPACK_1              : ILOpCode<156, "unpack1">;
+def IL_OP_UNPACK_2              : ILOpCode<157, "unpack2">;
+def IL_OP_UNPACK_3              : ILOpCode<158, "unpack3">;
+def IL_OP_PI_REDUCE             : ILOpCode<159, "pireduce">;
+def IL_OP_IBIT_COUNT            : ILOpCode<160, "icbits">;
+def IL_OP_I_FFB_SGN             : ILOpCode<161, "ffb_shi">;
+def IL_OP_F2U4                  : ILOpCode<162, "f_2_u4">;
+def IL_OP_BIT_ALIGN             : ILOpCode<163, "bitalign">;
+def IL_OP_BYTE_ALIGN            : ILOpCode<164, "bytealign">;
+def IL_OP_U4_LERP               : ILOpCode<165, "u4lerp">;
+def IL_OP_SAD                   : ILOpCode<166, "sad">;
+def IL_OP_SAD_HI                : ILOpCode<167, "sadhi">;
+def IL_OP_SAD4                  : ILOpCode<168, "sad4">;
+def IL_OP_UBIT_INSERT           : ILOpCode<169, "ubit_insert">;
+def IL_OP_I_CARRY               : ILOpCode<170, "icarry">;
+def IL_OP_I_BORROW              : ILOpCode<171, "iborrow">;
+def IL_OP_U_MAD24               : ILOpCode<172, "umad24">;
+def IL_OP_U_MUL24               : ILOpCode<173, "umul24">;
+def IL_OP_I_MAD24               : ILOpCode<174, "imad24">;
+def IL_OP_I_MUL24               : ILOpCode<175, "imul24">;
+def IL_OP_CLAMP                 : ILOpCode<176, "clamp">;
+def IL_OP_LERP                  : ILOpCode<177, "lrp">;
+def IL_OP_FMA                   : ILOpCode<178, "fma">;
+def IL_OP_D_MIN                 : ILOpCode<179, "dmin">;
+def IL_OP_D_MAX                 : ILOpCode<180, "dmax">;
+def IL_OP_D_SQRT                : ILOpCode<181, "dsqrt">;
+def IL_OP_DP2_ADD               : ILOpCode<182, "dp2add">;
+def IL_OP_F16_TO_F32            : ILOpCode<183, "f162f">;
+def IL_OP_F32_TO_F16            : ILOpCode<184, "f2f16">;
+def IL_REG_LOCAL_ID_FLAT        : ILOpCode<185, "vTidInGrpFlat">;
+def IL_REG_LOCAL_ID             : ILOpCode<186, "vTidInGrp">;
+def IL_REG_GLOBAL_ID_FLAT       : ILOpCode<187, "vAbsTidFlag">;
+def IL_REG_GLOBAL_ID            : ILOpCode<188, "vAbsTid">;
+def IL_REG_GROUP_ID_FLAT        : ILOpCode<189, "vThreadGrpIDFlat">;
+def IL_REG_GROUP_ID             : ILOpCode<190, "vThreadGrpID">;
+def IL_OP_D_RCP                 : ILOpCode<191, "drcp_zeroop(infinity)">;
+def IL_OP_D_RSQ                 : ILOpCode<192, "drsq_zeroop(infinity)">;
+def IL_OP_D_MOV                 : ILOpCode<193, "dmov">;
+def IL_OP_D_MOVC                : ILOpCode<194, "dmovc">;
+def IL_OP_NOP                   : ILOpCode<195, "nop">;
+def IL_OP_UAV_ADD               : ILOpCode<196, "uav_add">;
+def IL_OP_UAV_AND               : ILOpCode<197, "uav_and">;
+def IL_OP_UAV_MAX               : ILOpCode<198, "uav_max">;
+def IL_OP_UAV_MIN               : ILOpCode<199, "uav_min">;
+def IL_OP_UAV_OR                : ILOpCode<200, "uav_or">;
+def IL_OP_UAV_RSUB              : ILOpCode<201, "uav_rsub">;
+def IL_OP_UAV_SUB               : ILOpCode<202, "uav_sub">;
+def IL_OP_UAV_UMAX              : ILOpCode<203, "uav_umax">;
+def IL_OP_UAV_UMIN              : ILOpCode<204, "uav_umin">;
+def IL_OP_UAV_XOR               : ILOpCode<205, "uav_xor">;
+def IL_OP_UAV_INC               : ILOpCode<206, "uav_uinc">;
+def IL_OP_UAV_DEC               : ILOpCode<207, "uav_udec">;
+def IL_OP_UAV_CMP               : ILOpCode<208, "uav_cmp">;
+def IL_OP_UAV_READ_ADD          : ILOpCode<209, "uav_read_add">;
+def IL_OP_UAV_READ_AND          : ILOpCode<210, "uav_read_and">;
+def IL_OP_UAV_READ_MAX          : ILOpCode<211, "uav_read_max">;
+def IL_OP_UAV_READ_MIN          : ILOpCode<212, "uav_read_min">;
+def IL_OP_UAV_READ_OR           : ILOpCode<213, "uav_read_or">;
+def IL_OP_UAV_READ_RSUB         : ILOpCode<214, "uav_read_rsub">;
+def IL_OP_UAV_READ_SUB          : ILOpCode<215, "uav_read_sub">;
+def IL_OP_UAV_READ_UMAX         : ILOpCode<216, "uav_read_umax">;
+def IL_OP_UAV_READ_UMIN         : ILOpCode<217, "uav_read_umin">;
+def IL_OP_UAV_READ_XOR          : ILOpCode<218, "uav_read_xor">;
+def IL_OP_UAV_READ_INC          : ILOpCode<219, "uav_read_uinc">;
+def IL_OP_UAV_READ_DEC          : ILOpCode<220, "uav_read_udec">;
+def IL_OP_UAV_READ_XCHG         : ILOpCode<221, "uav_read_xchg">;
+def IL_OP_UAV_READ_CMPXCHG      : ILOpCode<222, "uav_read_cmp_xchg">;
+def IL_OP_LDS_ADD               : ILOpCode<223, "lds_add">;
+def IL_OP_LDS_AND               : ILOpCode<224, "lds_and">;
+def IL_OP_LDS_MAX               : ILOpCode<225, "lds_max">;
+def IL_OP_LDS_MIN               : ILOpCode<226, "lds_min">;
+def IL_OP_LDS_OR                : ILOpCode<227, "lds_or">;
+def IL_OP_LDS_RSUB              : ILOpCode<228, "lds_rsub">;
+def IL_OP_LDS_SUB               : ILOpCode<229, "lds_sub">;
+def IL_OP_LDS_UMAX              : ILOpCode<230, "lds_umax">;
+def IL_OP_LDS_UMIN              : ILOpCode<231, "lds_umin">;
+def IL_OP_LDS_XOR               : ILOpCode<232, "lds_xor">;
+def IL_OP_LDS_INC               : ILOpCode<233, "lds_inc">;
+def IL_OP_LDS_DEC               : ILOpCode<234, "lds_dec">;
+def IL_OP_LDS_CMP               : ILOpCode<235, "lds_cmp">;
+def IL_OP_LDS_READ_ADD          : ILOpCode<236, "lds_read_add">;
+def IL_OP_LDS_READ_AND          : ILOpCode<237, "lds_read_and">;
+def IL_OP_LDS_READ_MAX          : ILOpCode<238, "lds_read_max">;
+def IL_OP_LDS_READ_MIN          : ILOpCode<239, "lds_read_min">;
+def IL_OP_LDS_READ_OR           : ILOpCode<240, "lds_read_or">;
+def IL_OP_LDS_READ_RSUB         : ILOpCode<241, "lds_read_rsub">;
+def IL_OP_LDS_READ_SUB          : ILOpCode<242, "lds_read_sub">;
+def IL_OP_LDS_READ_UMAX         : ILOpCode<243, "lds_read_umax">;
+def IL_OP_LDS_READ_UMIN         : ILOpCode<244, "lds_read_umin">;
+def IL_OP_LDS_READ_XOR          : ILOpCode<245, "lds_read_xor">;
+def IL_OP_LDS_READ_INC          : ILOpCode<246, "lds_read_inc">;
+def IL_OP_LDS_READ_DEC          : ILOpCode<247, "lds_read_dec">;
+def IL_OP_LDS_READ_XCHG         : ILOpCode<248, "lds_read_xchg">;
+def IL_OP_LDS_READ_CMPXCHG      : ILOpCode<249, "lds_read_cmp_xchg">;
+def IL_OP_GDS_ADD               : ILOpCode<250, "gds_add">;
+def IL_OP_GDS_AND               : ILOpCode<251, "gds_and">;
+def IL_OP_GDS_MAX               : ILOpCode<252, "gds_max">;
+def IL_OP_GDS_MIN               : ILOpCode<253, "gds_min">;
+def IL_OP_GDS_OR                : ILOpCode<254, "gds_or">;
+def IL_OP_GDS_RSUB              : ILOpCode<255, "gds_rsub">;
+def IL_OP_GDS_SUB               : ILOpCode<256, "gds_sub">;
+def IL_OP_GDS_UMAX              : ILOpCode<257, "gds_umax">;
+def IL_OP_GDS_UMIN              : ILOpCode<258, "gds_umin">;
+def IL_OP_GDS_MSKOR             : ILOpCode<259, "gds_mskor">;
+def IL_OP_GDS_XOR               : ILOpCode<260, "gds_xor">;
+def IL_OP_GDS_INC               : ILOpCode<261, "gds_inc">;
+def IL_OP_GDS_DEC               : ILOpCode<262, "gds_dec">;
+def IL_OP_GDS_CMP               : ILOpCode<263, "gds_cmp">;
+def IL_OP_GDS_READ_ADD          : ILOpCode<264, "gds_read_add">;
+def IL_OP_GDS_READ_AND          : ILOpCode<265, "gds_read_and">;
+def IL_OP_GDS_READ_MAX          : ILOpCode<266, "gds_read_max">;
+def IL_OP_GDS_READ_MIN          : ILOpCode<267, "gds_read_min">;
+def IL_OP_GDS_READ_OR           : ILOpCode<268, "gds_read_or">;
+def IL_OP_GDS_READ_RSUB         : ILOpCode<269, "gds_read_rsub">;
+def IL_OP_GDS_READ_SUB          : ILOpCode<270, "gds_read_sub">;
+def IL_OP_GDS_READ_UMAX         : ILOpCode<271, "gds_read_umax">;
+def IL_OP_GDS_READ_UMIN         : ILOpCode<272, "gds_read_umin">;
+def IL_OP_GDS_READ_MSKOR        : ILOpCode<273, "gds_read_mskor">;
+def IL_OP_GDS_READ_XOR          : ILOpCode<274, "gds_read_xor">;
+def IL_OP_GDS_READ_INC          : ILOpCode<275, "gds_read_inc">;
+def IL_OP_GDS_READ_DEC          : ILOpCode<276, "gds_read_dec">;
+def IL_OP_GDS_READ_XCHG         : ILOpCode<277, "gds_read_xchg">;
+def IL_OP_GDS_READ_CMPXCHG      : ILOpCode<278, "gds_read_cmp_xchg">;
+def IL_OP_APPEND_BUF_ALLOC      : ILOpCode<279, "append_buf_alloc">;
+def IL_OP_APPEND_BUF_CONSUME    : ILOpCode<280, "append_buf_consume">;
+def IL_OP_I64_ADD               : ILOpCode<281, "i64add">;
+def IL_OP_I64_MAX               : ILOpCode<282, "i64max">;
+def IL_OP_U64_MAX               : ILOpCode<283, "u64max">;
+def IL_OP_I64_MIN               : ILOpCode<284, "i64min">;
+def IL_OP_U64_MIN               : ILOpCode<285, "u64min">;
+def IL_OP_I64_NEGATE            : ILOpCode<286, "i64negate">;
+def IL_OP_I64_SHL               : ILOpCode<287, "i64shl">;
+def IL_OP_I64_SHR               : ILOpCode<288, "i64shr">;
+def IL_OP_U64_SHR               : ILOpCode<289, "u64shr">;
+def IL_OP_I64_EQ                : ILOpCode<290, "i64eq">;
+def IL_OP_I64_GE                : ILOpCode<291, "i64ge">;
+def IL_OP_U64_GE                : ILOpCode<292, "u64ge">;
+def IL_OP_I64_LT                : ILOpCode<293, "i64lt">;
+def IL_OP_U64_LT                : ILOpCode<294, "u64lt">;
+def IL_OP_I64_NE                : ILOpCode<295, "i64ne">;
+def IL_OP_U_MULHI24             : ILOpCode<296, "umul24_high">;
+def IL_OP_I_MULHI24             : ILOpCode<297, "imul24_high">;
+def IL_OP_GDS_LOAD              : ILOpCode<298, "gds_load">;
+def IL_OP_GDS_STORE             : ILOpCode<299, "gds_store">;
+def IL_OP_LDS_LOAD              : ILOpCode<300, "lds_load">;
+def IL_OP_LDS_LOAD_VEC          : ILOpCode<301, "lds_load_vec">;
+def IL_OP_LDS_LOAD_BYTE         : ILOpCode<302, "lds_load_byte">;
+def IL_OP_LDS_LOAD_UBYTE        : ILOpCode<303, "lds_load_ubyte">;
+def IL_OP_LDS_LOAD_SHORT        : ILOpCode<304, "lds_load_short">;
+def IL_OP_LDS_LOAD_USHORT       : ILOpCode<305, "lds_load_ushort">;
+def IL_OP_LDS_STORE             : ILOpCode<306, "lds_store">;
+def IL_OP_LDS_STORE_VEC         : ILOpCode<307, "lds_store_vec">;
+def IL_OP_LDS_STORE_BYTE        : ILOpCode<308, "lds_store_byte">;
+def IL_OP_LDS_STORE_SHORT       : ILOpCode<309, "lds_store_short">;
+def IL_OP_RAW_UAV_LOAD          : ILOpCode<310, "uav_raw_load">;
+def IL_OP_RAW_UAV_STORE         : ILOpCode<311, "uav_raw_store">;
+def IL_OP_ARENA_UAV_LOAD        : ILOpCode<312, "uav_arena_load">;
+def IL_OP_ARENA_UAV_STORE       : ILOpCode<313, "uav_arena_store">;
+def IL_OP_LDS_MSKOR             : ILOpCode<314, "lds_mskor">;
+def IL_OP_LDS_READ_MSKOR        : ILOpCode<315, "lds_read_mskor">;
+def IL_OP_UAV_BYTE_LOAD         : ILOpCode<316, "uav_byte_load">;
+def IL_OP_UAV_UBYTE_LOAD        : ILOpCode<317, "uav_ubyte_load">;
+def IL_OP_UAV_SHORT_LOAD        : ILOpCode<318, "uav_short_load">;
+def IL_OP_UAV_USHORT_LOAD       : ILOpCode<319, "uav_ushort_load">;
+def IL_OP_UAV_BYTE_STORE        : ILOpCode<320, "uav_byte_store">;
+def IL_OP_UAV_SHORT_STORE       : ILOpCode<320, "uav_short_store">;
+def IL_OP_UAV_STORE             : ILOpCode<321, "uav_store">;
+def IL_OP_UAV_LOAD              : ILOpCode<322, "uav_load">;
+def IL_OP_MUL                   : ILOpCode<323, "mul">;
+def IL_OP_DIV_INF               : ILOpCode<324, "div_zeroop(infinity)">;
+def IL_OP_DIV_FLTMAX            : ILOpCode<325, "div_zeroop(fltmax)">;
+def IL_OP_DIV_ZERO              : ILOpCode<326, "div_zeroop(zero)">;
+def IL_OP_DIV_INFELSEMAX        : ILOpCode<327, "div_zeroop(inf_else_max)">;
+def IL_OP_FTOI_FLR              : ILOpCode<328, "ftoi_flr">;
+def IL_OP_FTOI_RPI              : ILOpCode<329, "ftoi_rpi">;
+def IL_OP_F32_TO_F16_NEAR       : ILOpCode<330, "f2f16_near">;
+def IL_OP_F32_TO_F16_NEG_INF    : ILOpCode<331, "f2f16_neg_inf">;
+def IL_OP_F32_TO_F16_PLUS_INF   : ILOpCode<332, "f2f16_plus_inf">;
+def IL_OP_I64_MUL               : ILOpCode<333, "i64mul">;
+def IL_OP_U64_MUL               : ILOpCode<334, "u64mul">;
+def IL_OP_LDEXP                 : ILOpCode<335, "fldexp">;
+def IL_OP_FREXP_EXP             : ILOpCode<336, "frexp_exp">;
+def IL_OP_FREXP_MANT            : ILOpCode<337, "frexp_mant">;
+def IL_OP_D_FREXP_EXP           : ILOpCode<338, "dfrexp_exp">;
+def IL_OP_D_FREXP_MANT          : ILOpCode<339, "dfrexp_mant">;
+def IL_OP_DTOI                  : ILOpCode<340, "dtoi">;
+def IL_OP_DTOU                  : ILOpCode<341, "dtou">;
+def IL_OP_ITOD                  : ILOpCode<342, "itod">;
+def IL_OP_UTOD                  : ILOpCode<343, "utod">;
+def IL_OP_MIN3                  : ILOpCode<344, "min3">;
+def IL_OP_MAX3                  : ILOpCode<345, "max3">;
+def IL_OP_MED3                  : ILOpCode<346, "med3">;
+def IL_OP_I_MIN3                : ILOpCode<347, "imin3">;
+def IL_OP_I_MAX3                : ILOpCode<348, "imax3">;
+def IL_OP_I_MED3                : ILOpCode<349, "imed3">;
+def IL_OP_U_MIN3                : ILOpCode<350, "umin3">;
+def IL_OP_U_MAX3                : ILOpCode<351, "umax3">;
+def IL_OP_U_MED3                : ILOpCode<352, "umed3">;
+def IL_OP_CLASS                 : ILOpCode<353, "class">;
+def IL_OP_D_CLASS               : ILOpCode<354, "dclass">;
+def IL_OP_CU_ID                 : ILOpCode<355, "cu_id">;
+def IL_OP_WAVE_ID               : ILOpCode<356, "wave_id">;
+def IL_OP_I64_SUB               : ILOpCode<357, "i64sub">;
+def IL_OP_I64_DIV               : ILOpCode<358, "i64div">;
+def IL_OP_U64_DIV               : ILOpCode<359, "u64div">;
+def IL_OP_I64_MOD               : ILOpCode<360, "i64mod">;
+def IL_OP_U64_MOD               : ILOpCode<361, "u64mod">;
+def IL_DCL_GWS_THREAD_COUNT     : ILOpCode<362, "dcl_gws_thread_count">;
+def IL_DCL_SEMAPHORE            : ILOpCode<363, "dcl_semaphore">;
+def IL_OP_SEMAPHORE_INIT        : ILOpCode<364, "init_semaphore">;
+def IL_OP_SEMAPHORE_WAIT        : ILOpCode<365, "semaphore_wait">;
+def IL_OP_SEMAPHORE_SIGNAL      : ILOpCode<366, "semaphore_signal">;
+def IL_OP_BARRIER_REGION        : ILOpCode<367, "fence_threads_gds">;
+def IL_OP_MSAD                  : ILOpCode<368, "msad">;
+def IL_OP_QSAD                  : ILOpCode<369, "qsad">;
+def IL_OP_MQSAD                 : ILOpCode<370, "mqsad">;
+def IL_OP_D_TRIG_PREOP          : ILOpCode<371, "dtrig_preop">;
+def IL_OP_ADD_RTE               : ILOpCode<372, "add_rte">;
+def IL_OP_ADD_RTP               : ILOpCode<373, "add_rtp">;
+def IL_OP_ADD_RTN               : ILOpCode<374, "add_rtn">;
+def IL_OP_ADD_RTZ               : ILOpCode<375, "add_rtz">;
+def IL_OP_SUB_RTE               : ILOpCode<376, "sub_rte">;
+def IL_OP_SUB_RTP               : ILOpCode<377, "sub_rtp">;
+def IL_OP_SUB_RTN               : ILOpCode<378, "sub_rtn">;
+def IL_OP_SUB_RTZ               : ILOpCode<379, "sub_rtz">;
+def IL_OP_MUL_RTE               : ILOpCode<380, "mul_rte">;
+def IL_OP_MUL_RTP               : ILOpCode<381, "mul_rtp">;
+def IL_OP_MUL_RTN               : ILOpCode<382, "mul_rtn">;
+def IL_OP_MUL_RTZ               : ILOpCode<383, "mul_rtz">;
+def IL_OP_MAD_RTE               : ILOpCode<384, "mad_rte">;
+def IL_OP_MAD_RTP               : ILOpCode<385, "mad_rtp">;
+def IL_OP_MAD_RTN               : ILOpCode<386, "mad_rtn">;
+def IL_OP_MAD_RTZ               : ILOpCode<387, "mad_rtz">;
+def IL_OP_FMA_RTE               : ILOpCode<388, "fma_rte">;
+def IL_OP_FMA_RTP               : ILOpCode<389, "fma_rtp">;
+def IL_OP_FMA_RTN               : ILOpCode<390, "fma_rtn">;
+def IL_OP_FMA_RTZ               : ILOpCode<391, "fma_rtz">;
+def IL_OP_D_ADD_RTE             : ILOpCode<392, "dadd_rte">;
+def IL_OP_D_ADD_RTP             : ILOpCode<393, "dadd_rtp">;
+def IL_OP_D_ADD_RTN             : ILOpCode<394, "dadd_rtn">;
+def IL_OP_D_ADD_RTZ             : ILOpCode<395, "dadd_rtz">;
+def IL_OP_D_SUB_RTE             : ILOpCode<396, "dsub_rte">;
+def IL_OP_D_SUB_RTP             : ILOpCode<397, "dsub_rtp">;
+def IL_OP_D_SUB_RTN             : ILOpCode<398, "dsub_rtn">;
+def IL_OP_D_SUB_RTZ             : ILOpCode<399, "dsub_rtz">;
+def IL_OP_D_MUL_RTE             : ILOpCode<400, "dmul_rte">;
+def IL_OP_D_MUL_RTP             : ILOpCode<401, "dmul_rtp">;
+def IL_OP_D_MUL_RTN             : ILOpCode<402, "dmul_rtn">;
+def IL_OP_D_MUL_RTZ             : ILOpCode<403, "dmul_rtz">;
+def IL_OP_D_MAD_RTE             : ILOpCode<404, "dmad_rte">;
+def IL_OP_D_MAD_RTP             : ILOpCode<405, "dmad_rtp">;
+def IL_OP_D_MAD_RTN             : ILOpCode<406, "dmad_rtn">;
+def IL_OP_D_MAD_RTZ             : ILOpCode<407, "dmad_rtz">;
+def IL_OP_SAD_U16               : ILOpCode<408, "sad_u16">;
+def IL_OP_SAD_U32               : ILOpCode<409, "sad_u32">;
+def IL_OP_D_ABS                 : ILOpCode<400, "dabs">;
+def IL_OP_DIV_PRECISE           : ILOpCode<411, "div_precise">;
+def IL_OP_LOAD                  : ILOpCode<412, "load">;
+def IL_OP_BFI                   : ILOpCode<413, "bfi">;
+def IL_OP_BFM                   : ILOpCode<414, "bfm">;
+def IL_OP_GDS_LOAD_BYTE         : ILOpCode<415, "gds_load_byte">;
+def IL_OP_GDS_LOAD_UBYTE        : ILOpCode<416, "gds_load_ubyte">;
+def IL_OP_GDS_LOAD_SHORT        : ILOpCode<417, "gds_load_short">;
+def IL_OP_GDS_LOAD_USHORT       : ILOpCode<418, "gds_load_ushort">;
+def IL_OP_GDS_STORE_BYTE        : ILOpCode<419, "gds_store_byte">;
+def IL_OP_GDS_STORE_SHORT       : ILOpCode<420, "gds_store_short">;

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,236 @@
+//===-- AMDILEvergreenDevice.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILEvergreenDevice.h"
+#include "AMDILEGAsmPrinter.h"
+#include "AMDILIOExpansion.h"
+#include "AMDILPointerManager.h"
+using namespace llvm;
+
+AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
+  : AMDILDevice(ST)
+{
+  setCaps();
+  std::string name = ST->getDeviceName();
+  if (name == "cedar") {
+    mDeviceFlag = OCL_DEVICE_CEDAR;
+  } else if (name == "redwood") {
+    mDeviceFlag = OCL_DEVICE_REDWOOD;
+  } else if (name == "cypress") {
+    mDeviceFlag = OCL_DEVICE_CYPRESS;
+  } else {
+    mDeviceFlag = OCL_DEVICE_JUNIPER;
+  }
+}
+
+AMDILEvergreenDevice::~AMDILEvergreenDevice()
+{
+}
+
+size_t AMDILEvergreenDevice::getMaxLDSSize() const
+{
+  if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+    return MAX_LDS_SIZE_800;
+  } else {
+    return 0;
+  }
+}
+size_t AMDILEvergreenDevice::getMaxGDSSize() const
+{
+  if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+    return MAX_GDS_SIZE_800;
+  } else {
+    return 0;
+  }
+}
+uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const
+{
+  return 12;
+}
+
+uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const
+{
+  switch(id) {
+  default:
+    assert(0 && "ID type passed in is unknown!");
+    break;
+  case CONSTANT_ID:
+  case RAW_UAV_ID:
+    if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+      return GLOBAL_RETURN_RAW_UAV_ID;
+    } else {
+      return DEFAULT_RAW_UAV_ID;
+    }
+  case GLOBAL_ID:
+  case ARENA_UAV_ID:
+    return DEFAULT_ARENA_UAV_ID;
+  case LDS_ID:
+    if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+      return DEFAULT_LDS_ID;
+    } else {
+      return DEFAULT_ARENA_UAV_ID;
+    }
+  case GDS_ID:
+    if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+      return DEFAULT_GDS_ID;
+    } else {
+      return DEFAULT_ARENA_UAV_ID;
+    }
+  case SCRATCH_ID:
+    if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+      return DEFAULT_SCRATCH_ID;
+    } else {
+      return DEFAULT_ARENA_UAV_ID;
+    }
+  };
+  return 0;
+}
+
+size_t AMDILEvergreenDevice::getWavefrontSize() const
+{
+  return AMDILDevice::WavefrontSize;
+}
+
+uint32_t AMDILEvergreenDevice::getGeneration() const
+{
+  return AMDILDeviceInfo::HD5XXX;
+}
+
+void AMDILEvergreenDevice::setCaps()
+{
+  mHWBits.set(AMDILDeviceInfo::ByteGDSOps);
+  mSWBits.reset(AMDILDeviceInfo::ByteGDSOps);
+
+  mSWBits.set(AMDILDeviceInfo::ArenaSegment);
+  mHWBits.set(AMDILDeviceInfo::ArenaUAV);
+  mHWBits.set(AMDILDeviceInfo::Semaphore);
+  if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
+    mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+    mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
+  }
+  mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
+  if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
+    mHWBits.set(AMDILDeviceInfo::ByteStores);
+  }
+  if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+    mSWBits.set(AMDILDeviceInfo::LocalMem);
+    mSWBits.set(AMDILDeviceInfo::RegionMem);
+  } else {
+    mHWBits.set(AMDILDeviceInfo::LocalMem);
+    mHWBits.set(AMDILDeviceInfo::RegionMem);
+  }
+  if (!mSTM->isApple()) {
+    if (mSTM->isOverride(AMDILDeviceInfo::Images)) {
+      mHWBits.set(AMDILDeviceInfo::Images);
+    }
+  } else {
+    mHWBits.set(AMDILDeviceInfo::Images);
+  }
+  if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+    mHWBits.set(AMDILDeviceInfo::CachedMem);
+  }
+  if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
+    mHWBits.set(AMDILDeviceInfo::MultiUAV);
+  }
+  if (mSTM->calVersion() > CAL_VERSION_SC_136) {
+    mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
+    mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
+    mHWBits.set(AMDILDeviceInfo::ArenaVectors);
+  } else {
+    mSWBits.set(AMDILDeviceInfo::ArenaVectors);
+  }
+  if (mSTM->calVersion() > CAL_VERSION_SC_137) {
+    mHWBits.set(AMDILDeviceInfo::LongOps);
+    mSWBits.reset(AMDILDeviceInfo::LongOps);
+  }
+  mHWBits.set(AMDILDeviceInfo::TmrReg);
+}
+
+FunctionPass*
+AMDILEvergreenDevice::getIOExpansion(
+  TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+  return new AMDILEGIOExpansion(TM, OptLevel);
+}
+
+AsmPrinter*
+AMDILEvergreenDevice::getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const
+{
+  return new AMDILEGAsmPrinter(ASM_PRINTER_ARGUMENTS);
+}
+
+FunctionPass*
+AMDILEvergreenDevice::getPointerManager(
+  TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+  return new AMDILEGPointerManager(TM, OptLevel);
+}
+
+AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
+  : AMDILEvergreenDevice(ST)
+{
+  setCaps();
+}
+
+AMDILCypressDevice::~AMDILCypressDevice()
+{
+}
+
+void AMDILCypressDevice::setCaps()
+{
+  if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+    mHWBits.set(AMDILDeviceInfo::DoubleOps);
+    mHWBits.set(AMDILDeviceInfo::FMA);
+  }
+}
+
+
+AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
+  : AMDILEvergreenDevice(ST)
+{
+  setCaps();
+}
+
+AMDILCedarDevice::~AMDILCedarDevice()
+{
+}
+
+void AMDILCedarDevice::setCaps()
+{
+  mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILCedarDevice::getWavefrontSize() const
+{
+  return AMDILDevice::QuarterWavefrontSize;
+}
+
+AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
+  : AMDILEvergreenDevice(ST)
+{
+  setCaps();
+}
+
+AMDILRedwoodDevice::~AMDILRedwoodDevice()
+{
+}
+
+void AMDILRedwoodDevice::setCaps()
+{
+  mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILRedwoodDevice::getWavefrontSize() const
+{
+  return AMDILDevice::HalfWavefrontSize;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,94 @@
+//===-- AMDILEvergreenDevice.h --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILEVERGREENDEVICE_H_
+#define _AMDILEVERGREENDEVICE_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+namespace llvm
+{
+class AMDILSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+// The AMDILEvergreenDevice is the base device class for all of the Evergreen
+// series of cards. This class contains information required to differentiate
+// the Evergreen device from the generic AMDILDevice. This device represents
+// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDILEvergreenDevice : public AMDILDevice
+{
+public:
+  AMDILEvergreenDevice(AMDILSubtarget *ST);
+  virtual ~AMDILEvergreenDevice();
+  virtual size_t getMaxLDSSize() const;
+  virtual size_t getMaxGDSSize() const;
+  virtual size_t getWavefrontSize() const;
+  virtual uint32_t getGeneration() const;
+  virtual uint32_t getMaxNumUAVs() const;
+  virtual uint32_t getResourceID(uint32_t) const;
+  virtual FunctionPass*
+  getIOExpansion(TargetMachine&, CodeGenOpt::Level) const;
+  virtual AsmPrinter*
+  getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const;
+  virtual FunctionPass*
+  getPointerManager(TargetMachine&, CodeGenOpt::Level) const;
+protected:
+  virtual void setCaps();
+}; // AMDILEvergreenDevice
+
+// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has
+// support for double precision operations. This device is used to represent
+// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+// and HD59XX cards.
+class AMDILCypressDevice : public AMDILEvergreenDevice
+{
+public:
+  AMDILCypressDevice(AMDILSubtarget *ST);
+  virtual ~AMDILCypressDevice();
+private:
+  virtual void setCaps();
+}; // AMDILCypressDevice
+
+
+// The AMDILCedarDevice is the class that represents all of the 'Cedar' based
+// devices. This class differs from the base AMDILEvergreenDevice in that the
+// device is a ~quarter of the 'Juniper'. These are commercially known as the
+// HD54XX and HD53XX series of cards.
+class AMDILCedarDevice : public AMDILEvergreenDevice
+{
+public:
+  AMDILCedarDevice(AMDILSubtarget *ST);
+  virtual ~AMDILCedarDevice();
+  virtual size_t getWavefrontSize() const;
+private:
+  virtual void setCaps();
+}; // AMDILCedarDevice
+
+// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based
+// devices. This class differs from the base class, in that these devices are
+// considered about half of a 'Juniper' device. These are commercially known as
+// the HD55XX and HD56XX series of cards.
+class AMDILRedwoodDevice : public AMDILEvergreenDevice
+{
+public:
+  AMDILRedwoodDevice(AMDILSubtarget *ST);
+  virtual ~AMDILRedwoodDevice();
+  virtual size_t getWavefrontSize() const;
+private:
+  virtual void setCaps();
+}; // AMDILRedwoodDevice
+
+} // namespace llvm
+#endif // _AMDILEVERGREENDEVICE_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,32 @@
+//===-- AMDILFixupKinds.h -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure This file is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AMDIL_AMDILFIXUPKINDS_H
+#define LLVM_AMDIL_AMDILFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm
+{
+namespace AMDIL
+{
+enum Fixups {
+  reloc_pcrel_4byte = FirstTargetFixupKind,  // 32-bit pcrel, e.g. a branch.
+  reloc_riprel_4byte,                        // 32-bit rip-relative
+};
+}
+}
+
+#endif
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,543 @@
+//===-- AMDILFormats.td ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AMDILTokenDesc.td"
+
+//===--------------------------------------------------------------------===//
+// The parent IL instruction class that inherits the Instruction class. This
+// class sets the corresponding namespace, the out and input dag lists the
+// pattern to match to and the string to print out for the assembly printer.
+//===--------------------------------------------------------------------===//
+class ILFormat<ILOpCode op, dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+     let Namespace = "AMDIL";
+     dag OutOperandList = outs;
+     dag InOperandList = ins;
+     ILOpCode operation = op;
+     let Pattern = pattern;
+     let AsmString = !strconcat(asmstr, "\n");
+     bit hasIEEEFlag = 0;
+     bit hasZeroOpFlag = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// The base class for vector insert instructions. It is a single dest, quad
+// source instruction where the last two source operands must be 32bit
+// immediate values that are encoding the swizzle of the source register
+// The src2 and src3 instructions must also be inversion of each other such
+// that if src2 is 0x1000300(x0z0), src3 must be 0x20004(0y0w). The values
+// are encoded as 32bit integer with each 8 char representing a swizzle value.
+// The encoding is as follows for 32bit register types:
+// 0x00 -> '_'
+// 0x01 -> 'x'
+// 0x02 -> 'y'
+// 0x03 -> 'z'
+// 0x04 -> 'w'
+// 0x05 -> 'x'
+// 0x06 -> 'y'
+// 0x07 -> 'z'
+// 0x08 -> 'w'
+// 0x09 -> '0'
+// The encoding is as follows for 64bit register types:
+// 0x00 -> "__"
+// 0x01 -> "xy"
+// 0x02 -> "zw"
+// 0x03 -> "xy"
+// 0x04 -> "zw"
+// 0x05 -> "00"
+//===--------------------------------------------------------------------===//
+class InsertVectorClass<ILOpCode op, RegisterClass DReg, RegisterClass SReg,
+      SDNode OpNode, string asmstr> :
+      ILFormat<op, (outs DReg:$dst),
+      (ins DReg:$src0, SReg:$src1, i32imm:$src2, i32imm:$src3),
+      !strconcat(asmstr, " $dst, $src0, $src1"),
+      [(set DReg:$dst, (OpNode DReg:$src0, SReg:$src1,
+                     timm:$src2, timm:$src3))]>;
+
+//===--------------------------------------------------------------------===//
+// Class that has one input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0" and
+// handles the unary math operators.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative for input and output register 0.
+//===--------------------------------------------------------------------===//
+class OneInOneOut<ILOpCode op, dag outs, dag ins,
+      string asmstr, list<dag> pattern>
+      : ILFormat<op, outs, ins, asmstr, pattern>
+{
+     ILDst       dst_reg;
+     ILDstMod    dst_mod;
+     ILRelAddr   dst_rel;
+     ILSrc       dst_reg_rel;
+     ILSrcMod    dst_reg_rel_mod;
+     ILSrc       src0_reg;
+     ILSrcMod    src0_mod;
+     ILRelAddr   src0_rel;
+     ILSrc       src0_reg_rel;
+     ILSrcMod    src0_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// A simplified version of OneInOneOut class where the pattern is standard
+// and does not need special cases. This requires that the pattern has
+// a SDNode and takes a source and destination register that is of type
+// RegisterClass. This is the standard unary op class.
+//===--------------------------------------------------------------------===//
+class UnaryOp<ILOpCode op, SDNode OpNode,
+      RegisterClass dRegs, RegisterClass sRegs>
+      : OneInOneOut<op, (outs dRegs:$dst), (ins sRegs:$src),
+      !strconcat(op.Text, " $dst, $src"),
+      [(set dRegs:$dst, (OpNode sRegs:$src))]>;
+
+//===--------------------------------------------------------------------===//
+// This class is similiar to the UnaryOp class, however, there is no
+// result value to assign.
+//===--------------------------------------------------------------------===//
+class UnaryOpNoRet<ILOpCode op, dag outs, dag ins,
+      string asmstr, list<dag> pattern>
+      : ILFormat<op, outs, ins, asmstr, pattern>
+{
+     ILSrc       src0_reg;
+     ILSrcMod    src0_mod;
+     ILRelAddr   src0_rel;
+     ILSrc       src0_reg_rel;
+     ILSrcMod    src0_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have two input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1" and
+// handles the binary math operators and comparison operations.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative for input register 1.
+//===--------------------------------------------------------------------===//
+class TwoInOneOut<ILOpCode op, dag outs, dag ins,
+      string asmstr, list<dag> pattern>
+      : OneInOneOut<op, outs, ins, asmstr, pattern>
+{
+     ILSrc       src1_reg;
+     ILSrcMod    src1_mod;
+     ILRelAddr   src1_rel;
+     ILSrc       src1_reg_rel;
+     ILSrcMod    src1_reg_rel_mod;
+}
+//===--------------------------------------------------------------------===//
+// A simplification of the TwoInOneOut pattern for Binary Operations.
+// This class is a helper class that assumes the simple pattern of
+// $dst = op $src0 $src1.
+// Other type of matching patterns need to use the TwoInOneOut class.
+//===--------------------------------------------------------------------===//
+class BinaryOp<ILOpCode op, SDNode OpNode, RegisterClass dReg,
+      RegisterClass sReg0, RegisterClass sReg1>
+      : TwoInOneOut<op, (outs dReg:$dst), (ins sReg0:$src0, sReg1:$src1),
+      !strconcat(op.Text, " $dst, $src0, $src1"),
+      [(set dReg:$dst, (OpNode sReg0:$src0, sReg1:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// The base class for vector extract instructions. The vector extract
+// instructions take as an input value a source register and a 32bit integer
+// with the same encoding as specified in InsertVectorClass and produces
+// a result with only the swizzled component in the destination register.
+//===--------------------------------------------------------------------===//
+class ExtractVectorClass<RegisterClass DReg, RegisterClass SReg, SDNode OpNode>
+: TwoInOneOut<IL_OP_MOV, (outs DReg:$dst), (ins SReg:$src0, i32imm:$src1),
+     "mov $dst, $src0",
+     [(set DReg:$dst, (OpNode SReg:$src0, timm:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// The base class for vector concatenation. This class creates either a vec2
+// or a vec4 of 32bit data types or a vec2 of 64bit data types. This is done
+// by swizzling either the 'x' or 'xy' components of the source operands
+// into the destination register.
+//===--------------------------------------------------------------------===//
+class VectorConcatClass<RegisterClass Dst, RegisterClass Src, SDNode OpNode>
+      : TwoInOneOut<IL_OP_I_ADD, (outs Dst:$dst), (ins Src:$src0, Src:$src1),
+      "iadd $dst, $src0, $src1",
+      [(set Dst:$dst, (OpNode Src:$src0, Src:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// Similiar to the UnaryOpNoRet class, but takes as arguments two input
+// operands. Used mainly for barrier instructions on PC platform.
+//===--------------------------------------------------------------------===//
+class BinaryOpNoRet<ILOpCode op, dag outs, dag ins,
+      string asmstr, list<dag> pattern>
+      : UnaryOpNoRet<op, outs, ins, asmstr, pattern>
+{
+     ILSrc       src1_reg;
+     ILSrcMod    src1_mod;
+     ILRelAddr   src1_rel;
+     ILSrc       src1_reg_rel;
+     ILSrcMod    src1_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have three input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
+// handles the mad and conditional mov instruction.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative.
+// This class is the parent class of TernaryOp
+//===--------------------------------------------------------------------===//
+class ThreeInOneOut<ILOpCode op, dag outs, dag ins,
+      string asmstr, list<dag> pattern>
+      : TwoInOneOut<op, outs, ins, asmstr, pattern> {
+           ILSrc       src2_reg;
+           ILSrcMod    src2_mod;
+           ILRelAddr   src2_rel;
+           ILSrc       src2_reg_rel;
+           ILSrcMod    src2_reg_rel_mod;
+      }
+
+//===--------------------------------------------------------------------===//
+// The g version of the Three Input pattern uses a standard pattern but
+// but allows specification of the register to further generalize the class
+// This class is mainly used in the generic multiclasses in AMDILMultiClass.td
+//===--------------------------------------------------------------------===//
+class TernaryOp<ILOpCode op, SDNode OpNode,
+      RegisterClass dReg,
+      RegisterClass sReg0,
+      RegisterClass sReg1,
+      RegisterClass sReg2>
+      : ThreeInOneOut<op, (outs dReg:$dst),
+      (ins sReg0:$src0, sReg1:$src1, sReg2:$src2),
+      !strconcat(op.Text, " $dst, $src0, $src1, $src2"),
+      [(set dReg:$dst,
+                (OpNode sReg0:$src0, sReg1:$src1, sReg2:$src2))]>;
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have three input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
+// handles the mad and conditional mov instruction.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative.
+// This class is the parent class of TernaryOp
+//===--------------------------------------------------------------------===//
+class FourInOneOut<ILOpCode op, dag outs, dag ins,
+      string asmstr, list<dag> pattern>
+      : ThreeInOneOut<op, outs, ins, asmstr, pattern> {
+           ILSrc       src3_reg;
+           ILSrcMod    src3_mod;
+           ILRelAddr   src3_rel;
+           ILSrc       src3_reg_rel;
+           ILSrcMod    src3_reg_rel_mod;
+      }
+
+
+//===--------------------------------------------------------------------===//
+// The macro class that is an extension of OneInOneOut but is tailored for
+// macros only where all the register types are the same
+//===--------------------------------------------------------------------===//
+class UnaryMacro<RegisterClass Dst, RegisterClass Src0, SDNode OpNode>
+: OneInOneOut<IL_OP_MACRO, (outs Dst:$dst),
+     (ins Src0:$src0),
+     "($dst),($src0)",
+     [(set Dst:$dst, (OpNode Src0:$src0))]>;
+
+//===--------------------------------------------------------------------===//
+// The macro class is an extension of TwoInOneOut but is tailored for
+// macros only where all the register types are the same
+//===--------------------------------------------------------------------===//
+class BinaryMacro<RegisterClass Dst,
+      RegisterClass Src0,
+      RegisterClass Src1,
+      SDNode OpNode>
+      : TwoInOneOut<IL_OP_MACRO, (outs Dst:$dst),
+      (ins Src0: $src0, Src1:$src1),
+      "($dst),($src0, $src1)",
+      [(set Dst:$dst, (OpNode Src0:$src0, Src1:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// Classes for dealing with atomic instructions w/ 32bit pointers
+//===--------------------------------------------------------------------===//
+class Append<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI32:$dst),
+      (ins MEM3232:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst")),
+      [(set GPRI32:$dst, (intr ADDR:$id))]>;
+
+
+class UniAtom<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI32:$dst),
+      (ins MEM3232:$ptr, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr")),
+      [(set GPRI32:$dst, (intr ADDR:$ptr, timm:$id))]>;
+
+
+class UniAtomNoRet<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs), (ins MEM3232:$ptr, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr")),
+      [(intr ADDR:$ptr, timm:$id)]>;
+
+class BinAtom<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$src, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src")),
+      [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, timm:$id))]>;
+
+
+class BinAtomNoRet<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs), (ins MEM3232:$ptr, GPRI32:$src, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr, $src")),
+      [(intr ADDR:$ptr, GPRI32:$src, timm:$id)]>;
+
+class TriAtom<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src, $src1")),
+      [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class CmpXChg<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src1, $src")),
+      [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class TriAtomNoRet<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs),
+      (ins MEM3232:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr, $src, $src1")),
+      [(intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id)]>;
+
+class CmpXChgNoRet<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs),
+      (ins MEM3232:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
+      [(intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id)]>;
+
+
+class UniAtomI64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI64:$dst),
+      (ins MEM3232:$ptr, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, ${ptr}0")),
+      [(set GPRI64:$dst, (intr ADDR:$ptr, timm:$id))]>;
+
+
+class UniAtomNoRetI64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs), (ins MEM3232:$ptr, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," ${ptr}0")),
+      [(intr ADDR:$ptr, timm:$id)]>;
+
+class BinAtomI64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI64:$dst),
+      (ins MEM3232:$ptr, GPRI64:$src, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, ${ptr}0, $src")),
+      [(set GPRI64:$dst, (i64 (intr ADDR:$ptr, GPRI64:$src, timm:$id)))]>;
+
+
+class BinAtomNoRetI64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs), (ins MEM3232:$ptr, GPRI64:$src, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," ${ptr}0, $src")),
+      [(intr ADDR:$ptr, GPRI64:$src, timm:$id)]>;
+
+class TriAtomI64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI64:$dst),
+      (ins MEM3232:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, ${ptr}0, $src, $src1")),
+      [(set GPRI64:$dst, (intr ADDR:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id))]>;
+
+class CmpXChgI64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI64:$dst),
+      (ins MEM3232:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, ${ptr}0, $src1, $src")),
+      [(set GPRI64:$dst, (intr ADDR:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id))]>;
+
+class TriAtomNoRetI64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs),
+      (ins MEM3232:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," ${ptr}0, $src, $src1")),
+      [(intr ADDR:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id)]>;
+
+class CmpXChgNoRetI64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs),
+      (ins MEM3232:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," ${ptr}0, $src1, $src")),
+      [(intr ADDR:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id)]>;
+
+
+//===--------------------------------------------------------------------===//
+// Classes for dealing with atomic instructions w/ 64bit pointers
+//===--------------------------------------------------------------------===//
+class Append64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI32:$dst),
+      (ins MEM6464:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst")),
+      [(set GPRI32:$dst, (intr ADDR64:$id))]>;
+
+
+class UniAtom64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI32:$dst),
+      (ins MEM6464:$ptr, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr")),
+      [(set GPRI32:$dst, (intr ADDR64:$ptr, timm:$id))]>;
+
+
+class UniAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs), (ins MEM6464:$ptr, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr")),
+      [(intr ADDR64:$ptr, timm:$id)]>;
+
+class BinAtom64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$src, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src")),
+      [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, timm:$id))]>;
+
+
+class BinAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs), (ins MEM6464:$ptr, GPRI32:$src, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr, $src")),
+      [(intr ADDR64:$ptr, GPRI32:$src, timm:$id)]>;
+
+class TriAtom64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src, $src1")),
+      [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class CmpXChg64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src1, $src")),
+      [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class TriAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs),
+      (ins MEM6464:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr, $src, $src1")),
+      [(intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id)]>;
+
+class CmpXChgNoRet64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs),
+      (ins MEM6464:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
+      [(intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id)]>;
+
+
+class UniAtom64I64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI64:$dst),
+      (ins MEM6464:$ptr, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr")),
+      [(set GPRI64:$dst, (intr ADDR64:$ptr, timm:$id))]>;
+
+
+class UniAtomNoRet64I64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs), (ins MEM6464:$ptr, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr")),
+      [(intr ADDR64:$ptr, timm:$id)]>;
+
+class BinAtom64I64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI64:$dst),
+      (ins MEM6464:$ptr, GPRI64:$src, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src")),
+      [(set GPRI64:$dst, (intr ADDR64:$ptr, GPRI64:$src, timm:$id))]>;
+
+
+class BinAtomNoRet64I64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs), (ins MEM6464:$ptr, GPRI64:$src, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr, $src")),
+      [(intr ADDR64:$ptr, GPRI64:$src, timm:$id)]>;
+
+class TriAtom64I64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI64:$dst),
+      (ins MEM6464:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src, $src1")),
+      [(set GPRI64:$dst, (intr ADDR64:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id))]>;
+
+class CmpXChg64I64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs GPRI64:$dst),
+      (ins MEM6464:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src1, $src")),
+      [(set GPRI64:$dst, (intr ADDR64:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id))]>;
+
+class TriAtomNoRet64I64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs),
+      (ins MEM6464:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr, $src, $src1")),
+      [(intr ADDR64:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id)]>;
+
+class CmpXChgNoRet64I64<ILOpCode op, string idType, SDNode intr>
+      : ILFormat<op, (outs),
+      (ins MEM6464:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+      !strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
+      [(intr ADDR64:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id)]>;
+//===--------------------------------------------------------------------===//
+// Intrinsic classes
+// Generic versions of the above classes but for Target specific intrinsics
+// instead of SDNode patterns.
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+     class VoidIntLong :
+          Intrinsic<[llvm_i64_ty], [], []>;
+     class VoidIntInt :
+          Intrinsic<[llvm_i32_ty], [], []>;
+     class VoidIntBool :
+          Intrinsic<[llvm_i32_ty], [], []>;
+     class UnaryIntInt :
+          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>;
+     class UnaryIntFloat :
+          Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], []>;
+     class ConvertIntFTOI :
+          Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], []>;
+     class ConvertIntITOF :
+          Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], []>;
+     class UnaryIntNoRetInt :
+          Intrinsic<[], [llvm_anyint_ty], []>;
+     class UnaryIntNoRetFloat :
+          Intrinsic<[], [llvm_anyfloat_ty], []>;
+     class BinaryIntInt :
+          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>;
+     class BinaryIntFloat :
+          Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>;
+     class BinaryIntNoRetInt :
+          Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
+     class BinaryIntNoRetFloat :
+          Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
+     class TernaryIntInt :
+          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+          LLVMMatchType<0>, LLVMMatchType<0>], []>;
+     class TernaryIntFloat :
+          Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+          LLVMMatchType<0>, LLVMMatchType<0>], []>;
+     class QuaternaryIntInt :
+          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+          LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], []>;
+     class UnaryAtomicInt :
+          Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+     class BinaryAtomicInt :
+          Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+     class TernaryAtomicInt :
+          Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+     class VoidAtomicIntNoRet :
+          Intrinsic<[], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+     class UnaryAtomicIntNoRet :
+          Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+     class BinaryAtomicIntNoRet :
+          Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+     class TernaryAtomicIntNoRet :
+          Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+     
+class UnaryAtomicLong :
+  Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadWriteArgMem]>;
+class BinaryAtomicLong :
+  Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], [IntrReadWriteArgMem]>;
+class TernaryAtomicLong :
+  Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty]>;
+class VoidAtomicLongNoRet :
+  Intrinsic<[], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+class UnaryAtomicLongNoRet :
+  Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrReadWriteArgMem]>;
+class BinaryAtomicLongNoRet :
+  Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], [IntrReadWriteArgMem]>;
+class TernaryAtomicLongNoRet :
+  Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrReadWriteArgMem]>;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,55 @@
+//===-- AMDILFrameLowering.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILFrameLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+using namespace llvm;
+AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl,
+                                       int LAO, unsigned TransAl)
+  : TargetFrameLowering(D, StackAl, LAO, TransAl)
+{
+}
+
+AMDILFrameLowering::~AMDILFrameLowering()
+{
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index.
+int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+    int FI) const
+{
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MFI->getObjectOffset(FI);
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
+{
+  NumEntries = 0;
+  return 0;
+}
+void
+AMDILFrameLowering::emitPrologue(MachineFunction &MF) const
+{
+}
+void
+AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+}
+bool
+AMDILFrameLowering::hasFP(const MachineFunction &MF) const
+{
+  return false;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,42 @@
+//===-- AMDILFrameLowering.h ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure This file is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILFRAME_LOWERING_H_
+#define _AMDILFRAME_LOWERING_H_
+#include "AMDIL.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+/// Information about the stack frame layout on the AMDIL targets. It holds
+/// the direction of the stack growth, the known stack alignment on entry to
+/// each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+
+namespace llvm
+{
+class AMDILFrameLowering : public TargetFrameLowering
+{
+public:
+  AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
+                     TransAl = 1);
+  virtual ~AMDILFrameLowering();
+  virtual int getFrameIndexOffset(const MachineFunction &MF,
+                                  int FI) const;
+  virtual const SpillSlot *
+  getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+  virtual void emitPrologue(MachineFunction &MF) const;
+  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+  virtual bool hasFP(const MachineFunction &MF) const;
+}; // class AMDILFrameLowering
+} // namespace llvm
+#endif // _AMDILFRAME_LOWERING_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1337 @@
+//===-- AMDILIOExpansion.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDIL IO Expansion class expands pseudo IO instructions into a sequence
+// of instructions that produces the correct results. These instructions are not
+// expanded earlier in the pass because any pass before this can assume to be able to
+// generate a load store instruction. So this pass can only have passes that execute
+// after it if no load store instructions can be generated.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILIOExpansion.h"
+#include "AMDIL.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+char AMDILIOExpansion::ID = 0;
+namespace llvm
+{
+FunctionPass*
+createAMDILIOExpansion(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+{
+  return TM.getSubtarget<AMDILSubtarget>()
+         .device()->getIOExpansion(TM, OptLevel);
+}
+}
+
+AMDILIOExpansion::AMDILIOExpansion(TargetMachine &tm,
+                                   CodeGenOpt::Level OptLevel) :
+  MachineFunctionPass(ID), TM(tm)
+{
+  mSTM = &tm.getSubtarget<AMDILSubtarget>();
+  mDebug = DEBUGME;
+  mTII = tm.getInstrInfo();
+  mKM = NULL;
+}
+
+AMDILIOExpansion::~AMDILIOExpansion()
+{
+}
+
+bool
+AMDILIOExpansion::runOnMachineFunction(MachineFunction &MF)
+{
+  mKM = const_cast<AMDILKernelManager*>(mSTM->getKernelManager());
+  mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
+  for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+       MFI != MFE; ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+    for (MachineBasicBlock::iterator MBI = MBB->begin(), MBE = MBB->end();
+         MBI != MBE; ++MBI) {
+      MachineInstr *MI = MBI;
+      if (isIOInstruction(MI)) {
+        mBB = MBB;
+        saveInst = false;
+        expandIOInstruction(MI);
+        if (!saveInst) {
+          // erase returns the instruction after
+          // and we want the instruction before
+          MBI = MBB->erase(MI);
+          --MBI;
+        }
+      }
+    }
+  }
+  return false;
+}
+const char *AMDILIOExpansion::getPassName() const
+{
+  return "AMDIL Generic IO Expansion Pass";
+}
+bool
+AMDILIOExpansion::isIOInstruction(MachineInstr *MI)
+{
+  if (!MI) {
+    return false;
+  }
+  if (isLoadInst(TM, MI) || isStoreInst(TM, MI)) {
+    return true;
+  }
+  return false;
+}
+void
+AMDILIOExpansion::expandIOInstruction(MachineInstr *MI)
+{
+  assert(isIOInstruction(MI) && "Must be an IO instruction to "
+         "be passed to this function!");
+  if (isLoadInst(TM, MI)) {
+    if (isGlobalInst(TM, MI)) {
+      expandGlobalLoad(MI);
+    } else if (isRegionInst(TM, MI)) {
+      expandRegionLoad(MI);
+    } else if (isPrivateInst(TM, MI)) {
+      expandPrivateLoad(MI);
+    } else if (isLocalInst(TM, MI)) {
+      expandLocalLoad(MI);
+    } else if (isConstantInst(TM, MI)) {
+      if (isConstantPoolInst(TM, MI)) {
+        expandConstantPoolLoad(MI);
+      } else {
+        expandConstantLoad(MI);
+      }
+    } else {
+      assert(!"Found an unsupported load instruction!");
+    }
+  } else if (isStoreInst(TM, MI)) {
+    if (isGlobalInst(TM, MI)) {
+      expandGlobalStore(MI);
+    } else if (isRegionInst(TM, MI)) {
+      expandRegionStore(MI);
+    } else if (isPrivateInst(TM, MI)) {
+      expandPrivateStore(MI);
+    } else if (isLocalInst(TM, MI)) {
+      expandLocalStore(MI);
+    } else {
+      assert(!"Found an unsupported load instruction!");
+    }
+  } else {
+    assert(!"Found an unsupported IO instruction!");
+  }
+}
+
+bool
+AMDILIOExpansion::isAddrCalcInstr(MachineInstr *MI)
+{
+  if (isPrivateInst(TM, MI) && isLoadInst(TM, MI)) {
+    // This section of code is a workaround for the problem of
+    // globally scoped constant address variables. The problems
+    // comes that although they are declared in the constant
+    // address space, all variables must be allocated in the
+    // private address space. So when there is a load from
+    // the global address, it automatically goes into the private
+    // address space. However, the data section is placed in the
+    // constant address space so we need to check to see if our
+    // load base address is a global variable or not. Only if it
+    // is not a global variable can we do the address calculation
+    // into the private memory ring.
+
+    MachineMemOperand& memOp = (**MI->memoperands_begin());
+    const Value *V = memOp.getValue();
+    if (V) {
+      const GlobalValue *GV = dyn_cast<GlobalVariable>(V);
+      return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)
+             && !(GV);
+    } else {
+      return false;
+    }
+  } else if (isConstantPoolInst(TM, MI) && isLoadInst(TM, MI)) {
+    return MI->getOperand(1).isReg();
+  } else if (isPrivateInst(TM, MI) && isStoreInst(TM, MI)) {
+    return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem);
+  } else if (isLocalInst(TM, MI) && (isStoreInst(TM, MI) || isLoadInst(TM, MI))) {
+    return mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem);
+  }
+  return false;
+}
+
+bool
+AMDILIOExpansion::isExtendLoad(MachineInstr *MI)
+{
+  return isSExtLoadInst(TM, MI) || isZExtLoadInst(TM, MI) || isAExtLoadInst(TM, MI);
+}
+
+bool
+AMDILIOExpansion::isHardwareRegion(MachineInstr *MI)
+{
+  return (isRegionInst(TM, MI) && (isLoadInst(TM, MI) || isStoreInst(TM, MI)) &&
+          mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem));
+}
+bool
+AMDILIOExpansion::isHardwareLocal(MachineInstr *MI)
+{
+  return (isLocalInst(TM, MI) && (isLoadInst(TM, MI) || isStoreInst(TM, MI)) &&
+          mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem));
+}
+bool
+AMDILIOExpansion::isPackedData(MachineInstr *MI)
+{
+  switch(MI->getOpcode()) {
+  default:
+    if (isTruncStoreInst(TM, MI)) {
+      switch (MI->getDesc().OpInfo[0].RegClass) {
+      default:
+        break;
+      case AMDIL::GPRV2I64RegClassID:
+      case AMDIL::GPRV2I32RegClassID:
+        switch (getMemorySize(MI)) {
+        case 2:
+        case 4:
+          return true;
+        default:
+          break;
+        }
+        break;
+      case AMDIL::GPRV4I32RegClassID:
+        switch (getMemorySize(MI)) {
+        case 4:
+        case 8:
+          return true;
+        default:
+          break;
+        }
+        break;
+      }
+    }
+    break;
+    ExpandCaseToPackedTypes(AMDIL::CPOOLLOAD);
+    ExpandCaseToPackedTypes(AMDIL::CPOOLSEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::CPOOLZEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::CPOOLAEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::GLOBALLOAD);
+    ExpandCaseToPackedTypes(AMDIL::GLOBALSEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::GLOBALZEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::GLOBALAEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::LOCALLOAD);
+    ExpandCaseToPackedTypes(AMDIL::LOCALSEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::LOCALZEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::LOCALAEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::REGIONLOAD);
+    ExpandCaseToPackedTypes(AMDIL::REGIONSEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::REGIONZEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::REGIONAEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::PRIVATELOAD);
+    ExpandCaseToPackedTypes(AMDIL::PRIVATESEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::PRIVATEZEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::PRIVATEAEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::CONSTANTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::CONSTANTSEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::CONSTANTAEXTLOAD);
+    ExpandCaseToPackedTypes(AMDIL::CONSTANTZEXTLOAD);
+    ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE)
+    ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
+    ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
+    ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE);
+    ExpandCaseToPackedTypes(AMDIL::GLOBALSTORE);
+    ExpandCaseToPackedTypes(AMDIL::PRIVATESTORE);
+    ExpandCaseToPackedTypes(AMDIL::LOCALSTORE);
+    ExpandCaseToPackedTypes(AMDIL::REGIONSTORE);
+    ExpandCaseToPackedTypes(AMDIL::CPOOLLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::CPOOLSEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::CPOOLZEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::CPOOLAEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::GLOBALLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::GLOBALSEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::GLOBALZEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::GLOBALAEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::LOCALLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::LOCALSEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::LOCALZEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::LOCALAEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::REGIONLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::REGIONSEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::REGIONZEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::REGIONAEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::PRIVATELOAD64);
+    ExpandCaseToPackedTypes(AMDIL::PRIVATESEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::PRIVATEZEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::PRIVATEAEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::CONSTANTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::CONSTANTSEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::CONSTANTAEXTLOAD64);
+    ExpandCaseToPackedTypes(AMDIL::CONSTANTZEXTLOAD64);
+    ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE64)
+    ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE64);
+    ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE64);
+    ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE64);
+    ExpandCaseToPackedTypes(AMDIL::GLOBALSTORE64);
+    ExpandCaseToPackedTypes(AMDIL::PRIVATESTORE64);
+    ExpandCaseToPackedTypes(AMDIL::LOCALSTORE64);
+    ExpandCaseToPackedTypes(AMDIL::REGIONSTORE64);
+    return true;
+  }
+  return false;
+}
+
+bool
+AMDILIOExpansion::isStaticCPLoad(MachineInstr *MI)
+{
+  if (isConstantPoolInst(TM, MI) && isLoadInst(TM, MI)) {
+    uint32_t x = 0;
+    uint32_t num = MI->getNumOperands();
+    for (x = 0; x < num; ++x) {
+      if (MI->getOperand(x).isCPI()) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool
+AMDILIOExpansion::isNbitType(Type *mType, uint32_t nBits, bool isScalar)
+{
+  if (!mType) {
+    return false;
+  }
+  if (dyn_cast<PointerType>(mType)) {
+    PointerType *PT = dyn_cast<PointerType>(mType);
+    return isNbitType(PT->getElementType(), nBits);
+  } else if (dyn_cast<StructType>(mType)) {
+    return TM.getTargetData()->getTypeSizeInBits(mType) == nBits;
+  } else if (dyn_cast<VectorType>(mType)) {
+    VectorType *VT = dyn_cast<VectorType>(mType);
+    size_t size = VT->getScalarSizeInBits();
+    return (isScalar ?
+            VT->getNumElements() * size == nBits : size == nBits);
+  } else if (dyn_cast<ArrayType>(mType)) {
+    ArrayType *AT = dyn_cast<ArrayType>(mType);
+    size_t size = AT->getScalarSizeInBits();
+    return (isScalar ?
+            AT->getNumElements() * size == nBits : size == nBits);
+  } else if (mType->isSized()) {
+    return mType->getScalarSizeInBits() == nBits;
+  } else {
+    assert(0 && "Found a type that we don't know how to handle!");
+    return false;
+  }
+}
+
+bool
+AMDILIOExpansion::isHardwareInst(MachineInstr *MI)
+{
+  AMDILAS::InstrResEnc curInst;
+  getAsmPrinterFlags(MI, curInst);
+  return curInst.bits.HardwareInst;
+}
+
+uint32_t
+AMDILIOExpansion::getDataReg(MachineInstr *MI)
+{
+  REG_PACKED_TYPE id = getPackedID(MI);
+  switch (getMemorySize(MI)) {
+  default:
+    return AMDIL::R1011;
+  case 4:
+    if (id == UNPACK_V4I8
+        || id == PACK_V4I8) {
+      return AMDIL::R1011;
+    } else if (id == UNPACK_V2I16
+               || id == PACK_V2I16) {
+      return AMDIL::Rxy1011;
+    }
+  case 2:
+    if (id == UNPACK_V2I8
+        || id == PACK_V2I8) {
+      return AMDIL::Rxy1011;
+    }
+  case 1:
+    return AMDIL::Rx1011;
+  case 8:
+    if (id == UNPACK_V4I16
+        || id == PACK_V4I16) {
+      return AMDIL::R1011;
+    }
+    return AMDIL::Rxy1011;
+  }
+}
+
+REG_PACKED_TYPE
+AMDILIOExpansion::getPackedID(MachineInstr *MI)
+{
+  switch (MI->getOpcode()) {
+  default:
+    break;
+  case AMDIL::GLOBALTRUNCSTORE64_v2i64i8:
+  case AMDIL::REGIONTRUNCSTORE64_v2i64i8:
+  case AMDIL::LOCALTRUNCSTORE64_v2i64i8:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i64i8:
+  case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
+  case AMDIL::REGIONTRUNCSTORE_v2i64i8:
+  case AMDIL::LOCALTRUNCSTORE_v2i64i8:
+  case AMDIL::PRIVATETRUNCSTORE_v2i64i8:
+  case AMDIL::GLOBALTRUNCSTORE64_v2i32i8:
+  case AMDIL::REGIONTRUNCSTORE64_v2i32i8:
+  case AMDIL::LOCALTRUNCSTORE64_v2i32i8:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i32i8:
+  case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
+  case AMDIL::REGIONTRUNCSTORE_v2i32i8:
+  case AMDIL::LOCALTRUNCSTORE_v2i32i8:
+  case AMDIL::PRIVATETRUNCSTORE_v2i32i8:
+  case AMDIL::GLOBALTRUNCSTORE64_v2i16i8:
+  case AMDIL::REGIONTRUNCSTORE64_v2i16i8:
+  case AMDIL::LOCALTRUNCSTORE64_v2i16i8:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i16i8:
+  case AMDIL::GLOBALSTORE64_v2i8:
+  case AMDIL::LOCALSTORE64_v2i8:
+  case AMDIL::REGIONSTORE64_v2i8:
+  case AMDIL::PRIVATESTORE64_v2i8:
+  case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
+  case AMDIL::REGIONTRUNCSTORE_v2i16i8:
+  case AMDIL::LOCALTRUNCSTORE_v2i16i8:
+  case AMDIL::PRIVATETRUNCSTORE_v2i16i8:
+  case AMDIL::GLOBALSTORE_v2i8:
+  case AMDIL::LOCALSTORE_v2i8:
+  case AMDIL::REGIONSTORE_v2i8:
+  case AMDIL::PRIVATESTORE_v2i8:
+    return PACK_V2I8;
+  case AMDIL::GLOBALTRUNCSTORE64_v4i32i8:
+  case AMDIL::REGIONTRUNCSTORE64_v4i32i8:
+  case AMDIL::LOCALTRUNCSTORE64_v4i32i8:
+  case AMDIL::PRIVATETRUNCSTORE64_v4i32i8:
+  case AMDIL::GLOBALTRUNCSTORE_v4i32i8:
+  case AMDIL::REGIONTRUNCSTORE_v4i32i8:
+  case AMDIL::LOCALTRUNCSTORE_v4i32i8:
+  case AMDIL::PRIVATETRUNCSTORE_v4i32i8:
+  case AMDIL::GLOBALTRUNCSTORE64_v4i16i8:
+  case AMDIL::REGIONTRUNCSTORE64_v4i16i8:
+  case AMDIL::LOCALTRUNCSTORE64_v4i16i8:
+  case AMDIL::PRIVATETRUNCSTORE64_v4i16i8:
+  case AMDIL::GLOBALSTORE64_v4i8:
+  case AMDIL::LOCALSTORE64_v4i8:
+  case AMDIL::REGIONSTORE64_v4i8:
+  case AMDIL::PRIVATESTORE64_v4i8:
+  case AMDIL::GLOBALTRUNCSTORE_v4i16i8:
+  case AMDIL::REGIONTRUNCSTORE_v4i16i8:
+  case AMDIL::LOCALTRUNCSTORE_v4i16i8:
+  case AMDIL::PRIVATETRUNCSTORE_v4i16i8:
+  case AMDIL::GLOBALSTORE_v4i8:
+  case AMDIL::LOCALSTORE_v4i8:
+  case AMDIL::REGIONSTORE_v4i8:
+  case AMDIL::PRIVATESTORE_v4i8:
+    return PACK_V4I8;
+  case AMDIL::GLOBALTRUNCSTORE64_v2i64i16:
+  case AMDIL::REGIONTRUNCSTORE64_v2i64i16:
+  case AMDIL::LOCALTRUNCSTORE64_v2i64i16:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i64i16:
+  case AMDIL::GLOBALTRUNCSTORE_v2i64i16:
+  case AMDIL::REGIONTRUNCSTORE_v2i64i16:
+  case AMDIL::LOCALTRUNCSTORE_v2i64i16:
+  case AMDIL::PRIVATETRUNCSTORE_v2i64i16:
+  case AMDIL::GLOBALTRUNCSTORE64_v2i32i16:
+  case AMDIL::REGIONTRUNCSTORE64_v2i32i16:
+  case AMDIL::LOCALTRUNCSTORE64_v2i32i16:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i32i16:
+  case AMDIL::GLOBALSTORE64_v2i16:
+  case AMDIL::LOCALSTORE64_v2i16:
+  case AMDIL::REGIONSTORE64_v2i16:
+  case AMDIL::PRIVATESTORE64_v2i16:
+  case AMDIL::GLOBALTRUNCSTORE_v2i32i16:
+  case AMDIL::REGIONTRUNCSTORE_v2i32i16:
+  case AMDIL::LOCALTRUNCSTORE_v2i32i16:
+  case AMDIL::PRIVATETRUNCSTORE_v2i32i16:
+  case AMDIL::GLOBALSTORE_v2i16:
+  case AMDIL::LOCALSTORE_v2i16:
+  case AMDIL::REGIONSTORE_v2i16:
+  case AMDIL::PRIVATESTORE_v2i16:
+    return PACK_V2I16;
+  case AMDIL::GLOBALTRUNCSTORE64_v4i32i16:
+  case AMDIL::REGIONTRUNCSTORE64_v4i32i16:
+  case AMDIL::LOCALTRUNCSTORE64_v4i32i16:
+  case AMDIL::PRIVATETRUNCSTORE64_v4i32i16:
+  case AMDIL::GLOBALSTORE64_v4i16:
+  case AMDIL::LOCALSTORE64_v4i16:
+  case AMDIL::REGIONSTORE64_v4i16:
+  case AMDIL::PRIVATESTORE64_v4i16:
+  case AMDIL::GLOBALTRUNCSTORE_v4i32i16:
+  case AMDIL::REGIONTRUNCSTORE_v4i32i16:
+  case AMDIL::LOCALTRUNCSTORE_v4i32i16:
+  case AMDIL::PRIVATETRUNCSTORE_v4i32i16:
+  case AMDIL::GLOBALSTORE_v4i16:
+  case AMDIL::LOCALSTORE_v4i16:
+  case AMDIL::REGIONSTORE_v4i16:
+  case AMDIL::PRIVATESTORE_v4i16:
+    return PACK_V4I16;
+
+  case AMDIL::GLOBALLOAD64_v2i8:
+  case AMDIL::GLOBALSEXTLOAD64_v2i8:
+  case AMDIL::GLOBALAEXTLOAD64_v2i8:
+  case AMDIL::GLOBALZEXTLOAD64_v2i8:
+  case AMDIL::LOCALLOAD64_v2i8:
+  case AMDIL::LOCALSEXTLOAD64_v2i8:
+  case AMDIL::LOCALAEXTLOAD64_v2i8:
+  case AMDIL::LOCALZEXTLOAD64_v2i8:
+  case AMDIL::REGIONLOAD64_v2i8:
+  case AMDIL::REGIONSEXTLOAD64_v2i8:
+  case AMDIL::REGIONAEXTLOAD64_v2i8:
+  case AMDIL::REGIONZEXTLOAD64_v2i8:
+  case AMDIL::PRIVATELOAD64_v2i8:
+  case AMDIL::PRIVATESEXTLOAD64_v2i8:
+  case AMDIL::PRIVATEAEXTLOAD64_v2i8:
+  case AMDIL::PRIVATEZEXTLOAD64_v2i8:
+  case AMDIL::CONSTANTLOAD64_v2i8:
+  case AMDIL::CONSTANTSEXTLOAD64_v2i8:
+  case AMDIL::CONSTANTAEXTLOAD64_v2i8:
+  case AMDIL::CONSTANTZEXTLOAD64_v2i8:
+  case AMDIL::GLOBALLOAD_v2i8:
+  case AMDIL::GLOBALSEXTLOAD_v2i8:
+  case AMDIL::GLOBALAEXTLOAD_v2i8:
+  case AMDIL::GLOBALZEXTLOAD_v2i8:
+  case AMDIL::LOCALLOAD_v2i8:
+  case AMDIL::LOCALSEXTLOAD_v2i8:
+  case AMDIL::LOCALAEXTLOAD_v2i8:
+  case AMDIL::LOCALZEXTLOAD_v2i8:
+  case AMDIL::REGIONLOAD_v2i8:
+  case AMDIL::REGIONSEXTLOAD_v2i8:
+  case AMDIL::REGIONAEXTLOAD_v2i8:
+  case AMDIL::REGIONZEXTLOAD_v2i8:
+  case AMDIL::PRIVATELOAD_v2i8:
+  case AMDIL::PRIVATESEXTLOAD_v2i8:
+  case AMDIL::PRIVATEAEXTLOAD_v2i8:
+  case AMDIL::PRIVATEZEXTLOAD_v2i8:
+  case AMDIL::CONSTANTLOAD_v2i8:
+  case AMDIL::CONSTANTSEXTLOAD_v2i8:
+  case AMDIL::CONSTANTAEXTLOAD_v2i8:
+  case AMDIL::CONSTANTZEXTLOAD_v2i8:
+    return UNPACK_V2I8;
+
+  case AMDIL::GLOBALLOAD64_v4i8:
+  case AMDIL::GLOBALSEXTLOAD64_v4i8:
+  case AMDIL::GLOBALAEXTLOAD64_v4i8:
+  case AMDIL::GLOBALZEXTLOAD64_v4i8:
+  case AMDIL::LOCALLOAD64_v4i8:
+  case AMDIL::LOCALSEXTLOAD64_v4i8:
+  case AMDIL::LOCALAEXTLOAD64_v4i8:
+  case AMDIL::LOCALZEXTLOAD64_v4i8:
+  case AMDIL::REGIONLOAD64_v4i8:
+  case AMDIL::REGIONSEXTLOAD64_v4i8:
+  case AMDIL::REGIONAEXTLOAD64_v4i8:
+  case AMDIL::REGIONZEXTLOAD64_v4i8:
+  case AMDIL::PRIVATELOAD64_v4i8:
+  case AMDIL::PRIVATESEXTLOAD64_v4i8:
+  case AMDIL::PRIVATEAEXTLOAD64_v4i8:
+  case AMDIL::PRIVATEZEXTLOAD64_v4i8:
+  case AMDIL::CONSTANTLOAD64_v4i8:
+  case AMDIL::CONSTANTSEXTLOAD64_v4i8:
+  case AMDIL::CONSTANTAEXTLOAD64_v4i8:
+  case AMDIL::CONSTANTZEXTLOAD64_v4i8:
+  case AMDIL::GLOBALLOAD_v4i8:
+  case AMDIL::GLOBALSEXTLOAD_v4i8:
+  case AMDIL::GLOBALAEXTLOAD_v4i8:
+  case AMDIL::GLOBALZEXTLOAD_v4i8:
+  case AMDIL::LOCALLOAD_v4i8:
+  case AMDIL::LOCALSEXTLOAD_v4i8:
+  case AMDIL::LOCALAEXTLOAD_v4i8:
+  case AMDIL::LOCALZEXTLOAD_v4i8:
+  case AMDIL::REGIONLOAD_v4i8:
+  case AMDIL::REGIONSEXTLOAD_v4i8:
+  case AMDIL::REGIONAEXTLOAD_v4i8:
+  case AMDIL::REGIONZEXTLOAD_v4i8:
+  case AMDIL::PRIVATELOAD_v4i8:
+  case AMDIL::PRIVATESEXTLOAD_v4i8:
+  case AMDIL::PRIVATEAEXTLOAD_v4i8:
+  case AMDIL::PRIVATEZEXTLOAD_v4i8:
+  case AMDIL::CONSTANTLOAD_v4i8:
+  case AMDIL::CONSTANTSEXTLOAD_v4i8:
+  case AMDIL::CONSTANTAEXTLOAD_v4i8:
+  case AMDIL::CONSTANTZEXTLOAD_v4i8:
+    return UNPACK_V4I8;
+
+  case AMDIL::GLOBALLOAD64_v2i16:
+  case AMDIL::GLOBALSEXTLOAD64_v2i16:
+  case AMDIL::GLOBALAEXTLOAD64_v2i16:
+  case AMDIL::GLOBALZEXTLOAD64_v2i16:
+  case AMDIL::LOCALLOAD64_v2i16:
+  case AMDIL::LOCALSEXTLOAD64_v2i16:
+  case AMDIL::LOCALAEXTLOAD64_v2i16:
+  case AMDIL::LOCALZEXTLOAD64_v2i16:
+  case AMDIL::REGIONLOAD64_v2i16:
+  case AMDIL::REGIONSEXTLOAD64_v2i16:
+  case AMDIL::REGIONAEXTLOAD64_v2i16:
+  case AMDIL::REGIONZEXTLOAD64_v2i16:
+  case AMDIL::PRIVATELOAD64_v2i16:
+  case AMDIL::PRIVATESEXTLOAD64_v2i16:
+  case AMDIL::PRIVATEAEXTLOAD64_v2i16:
+  case AMDIL::PRIVATEZEXTLOAD64_v2i16:
+  case AMDIL::CONSTANTLOAD64_v2i16:
+  case AMDIL::CONSTANTSEXTLOAD64_v2i16:
+  case AMDIL::CONSTANTAEXTLOAD64_v2i16:
+  case AMDIL::CONSTANTZEXTLOAD64_v2i16:
+  case AMDIL::GLOBALLOAD_v2i16:
+  case AMDIL::GLOBALSEXTLOAD_v2i16:
+  case AMDIL::GLOBALAEXTLOAD_v2i16:
+  case AMDIL::GLOBALZEXTLOAD_v2i16:
+  case AMDIL::LOCALLOAD_v2i16:
+  case AMDIL::LOCALSEXTLOAD_v2i16:
+  case AMDIL::LOCALAEXTLOAD_v2i16:
+  case AMDIL::LOCALZEXTLOAD_v2i16:
+  case AMDIL::REGIONLOAD_v2i16:
+  case AMDIL::REGIONSEXTLOAD_v2i16:
+  case AMDIL::REGIONAEXTLOAD_v2i16:
+  case AMDIL::REGIONZEXTLOAD_v2i16:
+  case AMDIL::PRIVATELOAD_v2i16:
+  case AMDIL::PRIVATESEXTLOAD_v2i16:
+  case AMDIL::PRIVATEAEXTLOAD_v2i16:
+  case AMDIL::PRIVATEZEXTLOAD_v2i16:
+  case AMDIL::CONSTANTLOAD_v2i16:
+  case AMDIL::CONSTANTSEXTLOAD_v2i16:
+  case AMDIL::CONSTANTAEXTLOAD_v2i16:
+  case AMDIL::CONSTANTZEXTLOAD_v2i16:
+    return UNPACK_V2I16;
+
+  case AMDIL::GLOBALLOAD64_v4i16:
+  case AMDIL::GLOBALSEXTLOAD64_v4i16:
+  case AMDIL::GLOBALAEXTLOAD64_v4i16:
+  case AMDIL::GLOBALZEXTLOAD64_v4i16:
+  case AMDIL::LOCALLOAD64_v4i16:
+  case AMDIL::LOCALSEXTLOAD64_v4i16:
+  case AMDIL::LOCALAEXTLOAD64_v4i16:
+  case AMDIL::LOCALZEXTLOAD64_v4i16:
+  case AMDIL::REGIONLOAD64_v4i16:
+  case AMDIL::REGIONSEXTLOAD64_v4i16:
+  case AMDIL::REGIONAEXTLOAD64_v4i16:
+  case AMDIL::REGIONZEXTLOAD64_v4i16:
+  case AMDIL::PRIVATELOAD64_v4i16:
+  case AMDIL::PRIVATESEXTLOAD64_v4i16:
+  case AMDIL::PRIVATEAEXTLOAD64_v4i16:
+  case AMDIL::PRIVATEZEXTLOAD64_v4i16:
+  case AMDIL::CONSTANTLOAD64_v4i16:
+  case AMDIL::CONSTANTSEXTLOAD64_v4i16:
+  case AMDIL::CONSTANTAEXTLOAD64_v4i16:
+  case AMDIL::CONSTANTZEXTLOAD64_v4i16:
+  case AMDIL::GLOBALLOAD_v4i16:
+  case AMDIL::GLOBALSEXTLOAD_v4i16:
+  case AMDIL::GLOBALAEXTLOAD_v4i16:
+  case AMDIL::GLOBALZEXTLOAD_v4i16:
+  case AMDIL::LOCALLOAD_v4i16:
+  case AMDIL::LOCALSEXTLOAD_v4i16:
+  case AMDIL::LOCALAEXTLOAD_v4i16:
+  case AMDIL::LOCALZEXTLOAD_v4i16:
+  case AMDIL::REGIONLOAD_v4i16:
+  case AMDIL::REGIONSEXTLOAD_v4i16:
+  case AMDIL::REGIONAEXTLOAD_v4i16:
+  case AMDIL::REGIONZEXTLOAD_v4i16:
+  case AMDIL::PRIVATELOAD_v4i16:
+  case AMDIL::PRIVATESEXTLOAD_v4i16:
+  case AMDIL::PRIVATEAEXTLOAD_v4i16:
+  case AMDIL::PRIVATEZEXTLOAD_v4i16:
+  case AMDIL::CONSTANTLOAD_v4i16:
+  case AMDIL::CONSTANTSEXTLOAD_v4i16:
+  case AMDIL::CONSTANTAEXTLOAD_v4i16:
+  case AMDIL::CONSTANTZEXTLOAD_v4i16:
+    return UNPACK_V4I16;
+  };
+  return NO_PACKING;
+}
+
+uint32_t
+AMDILIOExpansion::getPointerID(MachineInstr *MI)
+{
+  AMDILAS::InstrResEnc curInst;
+  getAsmPrinterFlags(MI, curInst);
+  return curInst.bits.ResourceID;
+}
+
+uint32_t
+AMDILIOExpansion::getShiftSize(MachineInstr *MI)
+{
+  switch(getPackedID(MI)) {
+  default:
+    return 0;
+  case PACK_V2I8:
+  case PACK_V4I8:
+  case UNPACK_V2I8:
+  case UNPACK_V4I8:
+    return 1;
+  case PACK_V2I16:
+  case PACK_V4I16:
+  case UNPACK_V2I16:
+  case UNPACK_V4I16:
+    return 2;
+  }
+  return 0;
+}
+uint32_t
+AMDILIOExpansion::getMemorySize(MachineInstr *MI)
+{
+  if (MI->memoperands_empty()) {
+    return 4;
+  }
+  return (uint32_t)((*MI->memoperands_begin())->getSize());
+}
+
+unsigned
+AMDILIOExpansion::expandLongExtend(MachineInstr *MI,
+                                   uint32_t numComps, uint32_t size, bool signedShift)
+{
+  DebugLoc DL = MI->getDebugLoc();
+  switch(size) {
+  default:
+    assert(0 && "Found a case we don't handle!");
+    break;
+  case 8:
+    if (numComps == 1) {
+      return expandLongExtendSub32(MI, AMDIL::SHL_i8, AMDIL::SHRVEC_v2i32,
+                                   AMDIL::USHRVEC_i8,
+                                   24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE, signedShift,
+                                   false);
+    } else if (numComps == 2) {
+      return expandLongExtendSub32(MI, AMDIL::SHL_v2i8, AMDIL::SHRVEC_v4i32,
+                                   AMDIL::USHRVEC_v2i8,
+                                   24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE_v2i64, signedShift,
+                                   true);
+    } else {
+      assert(0 && "Found a case we don't handle!");
+    }
+    break;
+  case 16:
+    if (numComps == 1) {
+      return expandLongExtendSub32(MI, AMDIL::SHL_i16, AMDIL::SHRVEC_v2i32,
+                                   AMDIL::USHRVEC_i16,
+                                   16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE, signedShift,
+                                   false);
+    } else if (numComps == 2) {
+      return expandLongExtendSub32(MI, AMDIL::SHL_v2i16, AMDIL::SHRVEC_v4i32,
+                                   AMDIL::USHRVEC_v2i16,
+                                   16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE_v2i64, signedShift,
+                                   true);
+    } else {
+      assert(0 && "Found a case we don't handle!");
+    }
+    break;
+  case 32:
+    if (numComps == 1) {
+      MachineInstr *nMI = NULL;
+      if (signedShift) {
+        nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_i32), AMDIL::Rxy1011)
+              .addReg(AMDIL::Rx1011)
+              .addImm(mMFI->addi64Literal((0ULL | (31ULL << 32))));
+      } else {
+        nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rxy1011)
+              .addReg(AMDIL::Rx1011)
+              .addImm(mMFI->addi32Literal(0));
+      }
+      return nMI->getOperand(0).getReg();
+    } else if (numComps == 2) {
+      MachineInstr *nMI = NULL;
+      if (signedShift) {
+        BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v2i32), AMDIL::Rxy1012)
+        .addReg(AMDIL::Rxy1011)
+        .addImm(mMFI->addi64Literal(31));
+        nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+              .addReg(AMDIL::Rxy1011)
+              .addReg(AMDIL::Rxy1012);
+      } else {
+        nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+              .addReg(AMDIL::Rxy1011)
+              .addImm(mMFI->addi32Literal(0));
+      }
+      return nMI->getOperand(0).getReg();
+    } else {
+      assert(0 && "Found a case we don't handle!");
+    }
+  };
+  return 0;
+}
+unsigned
+AMDILIOExpansion::expandLongExtendSub32(MachineInstr *MI,
+                                        unsigned SHLop, unsigned SHRop, unsigned USHRop,
+                                        unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm,
+                                        unsigned LCRop, bool signedShift, bool vec2)
+{
+  MachineInstr *nMI = NULL;
+  DebugLoc DL = MI->getDebugLoc();
+  BuildMI(*mBB, MI, DL, mTII->get(SHLop),
+          (vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+  .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+  .addImm(mMFI->addi32Literal(SHLimm));
+  if (signedShift) {
+    BuildMI(*mBB, MI, DL, mTII->get(LCRop),
+            (vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
+    .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+    .addReg(AMDIL::Rxy1011);
+    nMI = BuildMI(*mBB, MI, DL, mTII->get(SHRop),
+                  (vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
+          .addReg((vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
+          .addImm(mMFI->addi64Literal(SHRimm));
+  } else {
+    BuildMI(*mBB, MI, DL, mTII->get(USHRop),
+            (vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+    .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+    .addImm(mMFI->addi32Literal(USHRimm));
+    nMI = BuildMI(*mBB, MI, MI->getDebugLoc(), mTII->get(LCRop),
+                  (vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
+          .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+          .addImm(mMFI->addi32Literal(0));
+  }
+  return nMI->getOperand(0).getReg();
+}
+
+unsigned
+AMDILIOExpansion::expandIntegerExtend(MachineInstr *MI, unsigned SHLop,
+                                      unsigned SHRop, unsigned offset, unsigned reg)
+{
+  DebugLoc DL = MI->getDebugLoc();
+  offset = mMFI->addi32Literal(offset);
+  BuildMI(*mBB, MI, DL,
+          mTII->get(SHLop), reg)
+  .addReg(reg).addImm(offset);
+  BuildMI(*mBB, MI, DL,
+          mTII->get(SHRop), reg)
+  .addReg(reg).addImm(offset);
+  return reg;
+}
+unsigned
+AMDILIOExpansion::expandExtendLoad(MachineInstr *MI)
+{
+  if (!isExtendLoad(MI)) {
+    return 0;
+  }
+  Type *mType = NULL;
+  if (!MI->memoperands_empty()) {
+    MachineMemOperand *memOp = (*MI->memoperands_begin());
+    const Value *moVal = (memOp) ? memOp->getValue() : NULL;
+    mType = (moVal) ? moVal->getType() : NULL;
+  }
+  unsigned opcode = 0;
+  DebugLoc DL = MI->getDebugLoc();
+  if (isZExtLoadInst(TM, MI) || isAExtLoadInst(TM, MI) || isSExtLoadInst(TM, MI)) {
+    switch(MI->getDesc().OpInfo[0].RegClass) {
+    default:
+      assert(0 && "Found an extending load that we don't handle!");
+      break;
+    case AMDIL::GPRI16RegClassID:
+      if (!isHardwareLocal(MI)
+          || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) {
+        opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_i16 : AMDIL::USHRVEC_i16;
+        return expandIntegerExtend(MI, AMDIL::SHL_i16, opcode, 24, AMDIL::Rx1011);
+      }
+      break;
+    case AMDIL::GPRV2I16RegClassID:
+      opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v2i16 : AMDIL::USHRVEC_v2i16;
+      return expandIntegerExtend(MI, AMDIL::SHL_v2i16, opcode, 24, AMDIL::Rxy1011);
+      break;
+    case AMDIL::GPRV4I8RegClassID:
+      opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i8 : AMDIL::USHRVEC_v4i8;
+      return expandIntegerExtend(MI, AMDIL::SHL_v4i8, opcode, 24, AMDIL::R1011);
+      break;
+    case AMDIL::GPRV4I16RegClassID:
+      opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i16 : AMDIL::USHRVEC_v4i16;
+      return expandIntegerExtend(MI, AMDIL::SHL_v4i16, opcode, 24, AMDIL::R1011);
+      break;
+    case AMDIL::GPRI32RegClassID:
+      // We can be a i8 or i16 bit sign extended value
+      if (isNbitType(mType, 8) || getMemorySize(MI) == 1) {
+        opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32;
+        expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 24, AMDIL::Rx1011);
+      } else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) {
+        opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32;
+        expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 16, AMDIL::Rx1011);
+      } else {
+        assert(0 && "Found an extending load that we don't handle!");
+      }
+      return AMDIL::Rx1011;
+      break;
+    case AMDIL::GPRV2I32RegClassID:
+      // We can be a v2i8 or v2i16 bit sign extended value
+      if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) {
+        opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32;
+        expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 24, AMDIL::Rxy1011);
+      } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) {
+        opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32;
+        expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 16, AMDIL::Rxy1011);
+      } else {
+        assert(0 && "Found an extending load that we don't handle!");
+      }
+      return AMDIL::Rxy1011;
+      break;
+    case AMDIL::GPRV4I32RegClassID:
+      // We can be a v4i8 or v4i16 bit sign extended value
+      if (isNbitType(mType, 8, false) || getMemorySize(MI) == 4) {
+        opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32;
+        expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 24, AMDIL::R1011);
+      } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 8) {
+        opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32;
+        expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 16, AMDIL::R1011);
+      } else {
+        assert(0 && "Found an extending load that we don't handle!");
+      }
+      return AMDIL::R1011;
+      break;
+    case AMDIL::GPRI64RegClassID:
+      // We can be a i8, i16 or i32 bit sign extended value
+      if (isNbitType(mType, 8) || getMemorySize(MI) == 1) {
+        return expandLongExtend(MI, 1, 8, isSExtLoadInst(TM, MI));
+      } else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) {
+        return expandLongExtend(MI, 1, 16, isSExtLoadInst(TM, MI));
+      } else if (isNbitType(mType, 32) || getMemorySize(MI) == 4) {
+        return expandLongExtend(MI, 1, 32, isSExtLoadInst(TM, MI));
+      } else {
+        assert(0 && "Found an extending load that we don't handle!");
+      }
+      break;
+    case AMDIL::GPRV2I64RegClassID:
+      // We can be a v2i8, v2i16 or v2i32 bit sign extended value
+      if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) {
+        return expandLongExtend(MI, 2, 8, isSExtLoadInst(TM, MI));
+      } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) {
+        return expandLongExtend(MI, 2, 16, isSExtLoadInst(TM, MI));
+      } else if (isNbitType(mType, 32, false) || getMemorySize(MI) == 8) {
+        return expandLongExtend(MI, 2, 32, isSExtLoadInst(TM, MI));
+      } else {
+        assert(0 && "Found an extending load that we don't handle!");
+      }
+      break;
+    case AMDIL::GPRF32RegClassID:
+      BuildMI(*mBB, MI, DL,
+              mTII->get(AMDIL::HTOF_f32), AMDIL::Rx1011)
+      .addReg(AMDIL::Rx1011);
+      return AMDIL::Rx1011;
+    case AMDIL::GPRV2F32RegClassID:
+      BuildMI(*mBB, MI, DL,
+              mTII->get(AMDIL::HTOF_v2f32), AMDIL::Rxy1011)
+      .addReg(AMDIL::Rxy1011);
+      return AMDIL::Rxy1011;
+    case AMDIL::GPRV4F32RegClassID:
+      BuildMI(*mBB, MI, DL,
+              mTII->get(AMDIL::HTOF_v4f32), AMDIL::R1011)
+      .addReg(AMDIL::R1011);
+      return AMDIL::R1011;
+    case AMDIL::GPRF64RegClassID:
+      BuildMI(*mBB, MI, DL,
+              mTII->get(AMDIL::FTOD), AMDIL::Rxy1011)
+      .addReg(AMDIL::Rx1011);
+      return AMDIL::Rxy1011;
+    case AMDIL::GPRV2F64RegClassID:
+      BuildMI(*mBB, MI, DL,
+              mTII->get(AMDIL::FTOD), AMDIL::Rzw1011)
+      .addReg(AMDIL::Ry1011);
+      BuildMI(*mBB, MI, DL,
+              mTII->get(AMDIL::FTOD), AMDIL::Rxy1011)
+      .addReg(AMDIL::Rx1011);
+      return AMDIL::R1011;
+    }
+  }
+  return 0;
+}
+
+void
+AMDILIOExpansion::expandTruncData(MachineInstr *MI)
+{
+  if (!isTruncStoreInst(TM, MI)) {
+    return;
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  switch (MI->getOpcode()) {
+  default:
+    MI->dump();
+    assert(!"Found a trunc store instructions we don't handle!");
+    break;
+  case AMDIL::GLOBALTRUNCSTORE64_i64i8:
+  case AMDIL::GLOBALTRUNCSTORE64_v2i64i8:
+  case AMDIL::LOCALTRUNCSTORE64_i64i8:
+  case AMDIL::LOCALTRUNCSTORE64_v2i64i8:
+  case AMDIL::REGIONTRUNCSTORE64_i64i8:
+  case AMDIL::REGIONTRUNCSTORE64_v2i64i8:
+  case AMDIL::PRIVATETRUNCSTORE64_i64i8:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i64i8:
+  case AMDIL::GLOBALTRUNCSTORE_i64i8:
+  case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
+  case AMDIL::LOCALTRUNCSTORE_i64i8:
+  case AMDIL::LOCALTRUNCSTORE_v2i64i8:
+  case AMDIL::REGIONTRUNCSTORE_i64i8:
+  case AMDIL::REGIONTRUNCSTORE_v2i64i8:
+  case AMDIL::PRIVATETRUNCSTORE_i64i8:
+  case AMDIL::PRIVATETRUNCSTORE_v2i64i8:
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
+    .addReg(AMDIL::R1011);
+  case AMDIL::GLOBALTRUNCSTORE64_i16i8:
+  case AMDIL::GLOBALTRUNCSTORE64_v2i16i8:
+  case AMDIL::GLOBALTRUNCSTORE64_v4i16i8:
+  case AMDIL::LOCALTRUNCSTORE64_i16i8:
+  case AMDIL::LOCALTRUNCSTORE64_v2i16i8:
+  case AMDIL::LOCALTRUNCSTORE64_v4i16i8:
+  case AMDIL::REGIONTRUNCSTORE64_i16i8:
+  case AMDIL::REGIONTRUNCSTORE64_v2i16i8:
+  case AMDIL::REGIONTRUNCSTORE64_v4i16i8:
+  case AMDIL::PRIVATETRUNCSTORE64_i16i8:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i16i8:
+  case AMDIL::PRIVATETRUNCSTORE64_v4i16i8:
+  case AMDIL::GLOBALTRUNCSTORE_i16i8:
+  case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
+  case AMDIL::GLOBALTRUNCSTORE_v4i16i8:
+  case AMDIL::LOCALTRUNCSTORE_i16i8:
+  case AMDIL::LOCALTRUNCSTORE_v2i16i8:
+  case AMDIL::LOCALTRUNCSTORE_v4i16i8:
+  case AMDIL::REGIONTRUNCSTORE_i16i8:
+  case AMDIL::REGIONTRUNCSTORE_v2i16i8:
+  case AMDIL::REGIONTRUNCSTORE_v4i16i8:
+  case AMDIL::PRIVATETRUNCSTORE_i16i8:
+  case AMDIL::PRIVATETRUNCSTORE_v2i16i8:
+  case AMDIL::PRIVATETRUNCSTORE_v4i16i8:
+  case AMDIL::GLOBALTRUNCSTORE64_i32i8:
+  case AMDIL::GLOBALTRUNCSTORE64_v2i32i8:
+  case AMDIL::GLOBALTRUNCSTORE64_v4i32i8:
+  case AMDIL::LOCALTRUNCSTORE64_i32i8:
+  case AMDIL::LOCALTRUNCSTORE64_v2i32i8:
+  case AMDIL::LOCALTRUNCSTORE64_v4i32i8:
+  case AMDIL::REGIONTRUNCSTORE64_i32i8:
+  case AMDIL::REGIONTRUNCSTORE64_v2i32i8:
+  case AMDIL::REGIONTRUNCSTORE64_v4i32i8:
+  case AMDIL::PRIVATETRUNCSTORE64_i32i8:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i32i8:
+  case AMDIL::PRIVATETRUNCSTORE64_v4i32i8:
+  case AMDIL::GLOBALTRUNCSTORE_i32i8:
+  case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
+  case AMDIL::GLOBALTRUNCSTORE_v4i32i8:
+  case AMDIL::LOCALTRUNCSTORE_i32i8:
+  case AMDIL::LOCALTRUNCSTORE_v2i32i8:
+  case AMDIL::LOCALTRUNCSTORE_v4i32i8:
+  case AMDIL::REGIONTRUNCSTORE_i32i8:
+  case AMDIL::REGIONTRUNCSTORE_v2i32i8:
+  case AMDIL::REGIONTRUNCSTORE_v4i32i8:
+  case AMDIL::PRIVATETRUNCSTORE_i32i8:
+  case AMDIL::PRIVATETRUNCSTORE_v2i32i8:
+  case AMDIL::PRIVATETRUNCSTORE_v4i32i8:
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+    .addReg(AMDIL::R1011)
+    .addImm(mMFI->addi32Literal(0xFF));
+    break;
+  case AMDIL::GLOBALTRUNCSTORE64_i64i16:
+  case AMDIL::GLOBALTRUNCSTORE64_v2i64i16:
+  case AMDIL::LOCALTRUNCSTORE64_i64i16:
+  case AMDIL::LOCALTRUNCSTORE64_v2i64i16:
+  case AMDIL::REGIONTRUNCSTORE64_i64i16:
+  case AMDIL::REGIONTRUNCSTORE64_v2i64i16:
+  case AMDIL::PRIVATETRUNCSTORE64_i64i16:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i64i16:
+  case AMDIL::GLOBALTRUNCSTORE_i64i16:
+  case AMDIL::GLOBALTRUNCSTORE_v2i64i16:
+  case AMDIL::LOCALTRUNCSTORE_i64i16:
+  case AMDIL::LOCALTRUNCSTORE_v2i64i16:
+  case AMDIL::REGIONTRUNCSTORE_i64i16:
+  case AMDIL::REGIONTRUNCSTORE_v2i64i16:
+  case AMDIL::PRIVATETRUNCSTORE_i64i16:
+  case AMDIL::PRIVATETRUNCSTORE_v2i64i16:
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
+    .addReg(AMDIL::R1011);
+  case AMDIL::GLOBALTRUNCSTORE64_i32i16:
+  case AMDIL::GLOBALTRUNCSTORE64_v2i32i16:
+  case AMDIL::GLOBALTRUNCSTORE64_v4i32i16:
+  case AMDIL::LOCALTRUNCSTORE64_i32i16:
+  case AMDIL::LOCALTRUNCSTORE64_v2i32i16:
+  case AMDIL::LOCALTRUNCSTORE64_v4i32i16:
+  case AMDIL::REGIONTRUNCSTORE64_i32i16:
+  case AMDIL::REGIONTRUNCSTORE64_v2i32i16:
+  case AMDIL::REGIONTRUNCSTORE64_v4i32i16:
+  case AMDIL::PRIVATETRUNCSTORE64_i32i16:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i32i16:
+  case AMDIL::PRIVATETRUNCSTORE64_v4i32i16:
+  case AMDIL::GLOBALTRUNCSTORE_i32i16:
+  case AMDIL::GLOBALTRUNCSTORE_v2i32i16:
+  case AMDIL::GLOBALTRUNCSTORE_v4i32i16:
+  case AMDIL::LOCALTRUNCSTORE_i32i16:
+  case AMDIL::LOCALTRUNCSTORE_v2i32i16:
+  case AMDIL::LOCALTRUNCSTORE_v4i32i16:
+  case AMDIL::REGIONTRUNCSTORE_i32i16:
+  case AMDIL::REGIONTRUNCSTORE_v2i32i16:
+  case AMDIL::REGIONTRUNCSTORE_v4i32i16:
+  case AMDIL::PRIVATETRUNCSTORE_i32i16:
+  case AMDIL::PRIVATETRUNCSTORE_v2i32i16:
+  case AMDIL::PRIVATETRUNCSTORE_v4i32i16:
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+    .addReg(AMDIL::R1011)
+    .addImm(mMFI->addi32Literal(0xFFFF));
+    break;
+  case AMDIL::GLOBALTRUNCSTORE64_i64i32:
+  case AMDIL::LOCALTRUNCSTORE64_i64i32:
+  case AMDIL::REGIONTRUNCSTORE64_i64i32:
+  case AMDIL::PRIVATETRUNCSTORE64_i64i32:
+  case AMDIL::GLOBALTRUNCSTORE_i64i32:
+  case AMDIL::LOCALTRUNCSTORE_i64i32:
+  case AMDIL::REGIONTRUNCSTORE_i64i32:
+  case AMDIL::PRIVATETRUNCSTORE_i64i32:
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::LLO), AMDIL::Rx1011)
+    .addReg(AMDIL::Rxy1011);
+    break;
+  case AMDIL::GLOBALTRUNCSTORE64_v2i64i32:
+  case AMDIL::LOCALTRUNCSTORE64_v2i64i32:
+  case AMDIL::REGIONTRUNCSTORE64_v2i64i32:
+  case AMDIL::PRIVATETRUNCSTORE64_v2i64i32:
+  case AMDIL::GLOBALTRUNCSTORE_v2i64i32:
+  case AMDIL::LOCALTRUNCSTORE_v2i64i32:
+  case AMDIL::REGIONTRUNCSTORE_v2i64i32:
+  case AMDIL::PRIVATETRUNCSTORE_v2i64i32:
+    BuildMI(*mBB, MI, DL,
+            mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
+    .addReg(AMDIL::R1011);
+    break;
+  case AMDIL::GLOBALTRUNCSTORE64_f64f32:
+  case AMDIL::LOCALTRUNCSTORE64_f64f32:
+  case AMDIL::REGIONTRUNCSTORE64_f64f32:
+  case AMDIL::PRIVATETRUNCSTORE64_f64f32:
+  case AMDIL::GLOBALTRUNCSTORE_f64f32:
+  case AMDIL::LOCALTRUNCSTORE_f64f32:
+  case AMDIL::REGIONTRUNCSTORE_f64f32:
+  case AMDIL::PRIVATETRUNCSTORE_f64f32:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+            AMDIL::Rx1011).addReg(AMDIL::Rxy1011);
+    break;
+  case AMDIL::GLOBALTRUNCSTORE64_v2f64f32:
+  case AMDIL::LOCALTRUNCSTORE64_v2f64f32:
+  case AMDIL::REGIONTRUNCSTORE64_v2f64f32:
+  case AMDIL::PRIVATETRUNCSTORE64_v2f64f32:
+  case AMDIL::GLOBALTRUNCSTORE_v2f64f32:
+  case AMDIL::LOCALTRUNCSTORE_v2f64f32:
+  case AMDIL::REGIONTRUNCSTORE_v2f64f32:
+  case AMDIL::PRIVATETRUNCSTORE_v2f64f32:
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+            AMDIL::Rx1011).addReg(AMDIL::Rxy1011);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+            AMDIL::Ry1011).addReg(AMDIL::Rzw1011);
+    break;
+  }
+}
+void
+AMDILIOExpansion::expandAddressCalc(MachineInstr *MI)
+{
+  if (!isAddrCalcInstr(MI)) {
+    return;
+  }
+  DebugLoc DL = MI->getDebugLoc();
+  bool is64bit = is64bitLSOp(TM, MI);
+  uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+  uint32_t addInst = (is64bit) ? AMDIL::LADD_i64 : AMDIL::ADD_i32;
+  switch(MI->getOpcode()) {
+    ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE)
+    ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE64)
+    ExpandCaseToAllTypes(AMDIL::PRIVATESTORE)
+    ExpandCaseToAllTypes(AMDIL::PRIVATELOAD)
+    ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD)
+    ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD)
+    ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD)
+    ExpandCaseToAllTypes(AMDIL::PRIVATESTORE64)
+    ExpandCaseToAllTypes(AMDIL::PRIVATELOAD64)
+    ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD64)
+    ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD64)
+    ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD64)
+    BuildMI(*mBB, MI, DL, mTII->get(addInst),
+            addyReg).addReg(addyReg).addReg(AMDIL::T1);
+    break;
+    ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE)
+    ExpandCaseToAllTypes(AMDIL::LOCALLOAD)
+    ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD)
+    ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD)
+    ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD)
+    ExpandCaseToAllTypes(AMDIL::LOCALSTORE)
+    ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE64)
+    ExpandCaseToAllTypes(AMDIL::LOCALLOAD64)
+    ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD64)
+    ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD64)
+    ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD64)
+    ExpandCaseToAllTypes(AMDIL::LOCALSTORE64)
+    BuildMI(*mBB, MI, DL, mTII->get(addInst),
+            addyReg).addReg(addyReg).addReg(AMDIL::T2);
+    break;
+    ExpandCaseToAllTypes(AMDIL::CPOOLLOAD)
+    ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD)
+    ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD)
+    ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD)
+    ExpandCaseToAllTypes(AMDIL::CPOOLLOAD64)
+    ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD64)
+    ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD64)
+    ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD64)
+    BuildMI(*mBB, MI, DL, mTII->get(addInst),
+            addyReg).addReg(addyReg).addReg(AMDIL::SDP);
+    break;
+  default:
+    return;
+  }
+}
+void
+AMDILIOExpansion::expandLoadStartCode(MachineInstr *MI)
+{
+  DebugLoc DL = MI->getDebugLoc();
+  bool is64bit = is64bitLSOp(TM, MI);
+  uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+  uint32_t addInst = (is64bit) ? AMDIL::LADD_i64 : AMDIL::ADD_i32;
+  uint32_t moveInst = (is64bit) ? AMDIL::MOVE_i64 : AMDIL::MOVE_i32;
+  if (MI->getOperand(2).isReg()) {
+    BuildMI(*mBB, MI, DL, mTII->get(addInst),
+            addyReg).addReg(MI->getOperand(1).getReg())
+    .addReg(MI->getOperand(2).getReg());
+  } else {
+    BuildMI(*mBB, MI, DL, mTII->get(moveInst),
+            addyReg).addReg(MI->getOperand(1).getReg());
+  }
+  MI->getOperand(1).setReg(addyReg);
+  expandAddressCalc(MI);
+}
+void
+AMDILIOExpansion::emitStaticCPLoad(MachineInstr* MI, int swizzle,
+                                   int id, bool ExtFPLoad)
+{
+  DebugLoc DL = MI->getDebugLoc();
+  switch(swizzle) {
+  default:
+    BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad
+                                    ? AMDIL::DTOF : AMDIL::MOVE_i32),
+            MI->getOperand(0).getReg())
+    .addImm(id);
+    break;
+  case 1:
+  case 2:
+  case 3:
+    BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad
+                                    ? AMDIL::DTOF : AMDIL::MOVE_i32), AMDIL::Rx1001)
+    .addImm(id);
+    BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VINSERT_v4i32),
+            MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(0).getReg())
+    .addReg(AMDIL::Rx1001)
+    .addImm(swizzle + 1);
+    break;
+  };
+}
+void
+AMDILIOExpansion::emitCPInst(MachineInstr* MI,
+                             const Constant* C, AMDILKernelManager* KM, int swizzle, bool ExtFPLoad)
+{
+  if (const ConstantFP* CFP = dyn_cast<ConstantFP>(C)) {
+    if (CFP->getType()->isFloatTy()) {
+      uint32_t val = (uint32_t)(CFP->getValueAPF().bitcastToAPInt()
+                                .getZExtValue());
+      uint32_t id = mMFI->addi32Literal(val);
+      if (!id) {
+        const APFloat &APF = CFP->getValueAPF();
+        union dtol_union {
+          double d;
+          uint64_t ul;
+        } conv;
+        if (&APF.getSemantics()
+            == (const llvm::fltSemantics*)&APFloat::IEEEsingle) {
+          float fval = APF.convertToFloat();
+          conv.d = (double)fval;
+        } else {
+          conv.d = APF.convertToDouble();
+        }
+        id = mMFI->addi64Literal(conv.ul);
+      }
+      emitStaticCPLoad(MI, swizzle, id, ExtFPLoad);
+    } else {
+      const APFloat &APF = CFP->getValueAPF();
+      union ftol_union {
+        double d;
+        uint64_t ul;
+      } conv;
+      if (&APF.getSemantics()
+          == (const llvm::fltSemantics*)&APFloat::IEEEsingle) {
+        float fval = APF.convertToFloat();
+        conv.d = (double)fval;
+      } else {
+        conv.d = APF.convertToDouble();
+      }
+      uint32_t id = mMFI->getLongLits(conv.ul);
+      if (!id) {
+        id = mMFI->getIntLits((uint32_t)conv.ul);
+      }
+      emitStaticCPLoad(MI, swizzle, id, ExtFPLoad);
+    }
+  } else if (const ConstantInt* CI = dyn_cast<ConstantInt>(C)) {
+    int64_t val = 0;
+    if (CI) {
+      val = CI->getSExtValue();
+    }
+    if (CI->getBitWidth() == 64) {
+      emitStaticCPLoad(MI, swizzle, mMFI->addi64Literal(val), ExtFPLoad);
+    } else {
+      emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(val), ExtFPLoad);
+    }
+  } else if (const ConstantArray* CA = dyn_cast<ConstantArray>(C)) {
+    uint32_t size = CA->getNumOperands();
+    assert(size < 5 && "Cannot handle a constant array where size > 4");
+    if (size > 4) {
+      size = 4;
+    }
+    for (uint32_t x = 0; x < size; ++x) {
+      emitCPInst(MI, CA->getOperand(0), KM, x, ExtFPLoad);
+    }
+  } else if (const ConstantAggregateZero* CAZ
+             = dyn_cast<ConstantAggregateZero>(C)) {
+    if (CAZ->isNullValue()) {
+      emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(0), ExtFPLoad);
+    }
+  } else if (const ConstantStruct* CS = dyn_cast<ConstantStruct>(C)) {
+    uint32_t size = CS->getNumOperands();
+    assert(size < 5 && "Cannot handle a constant array where size > 4");
+    if (size > 4) {
+      size = 4;
+    }
+    for (uint32_t x = 0; x < size; ++x) {
+      emitCPInst(MI, CS->getOperand(0), KM, x, ExtFPLoad);
+    }
+  } else if (const ConstantVector* CV = dyn_cast<ConstantVector>(C)) {
+    // TODO: Make this handle vectors natively up to the correct
+    // size
+    uint32_t size = CV->getNumOperands();
+    assert(size < 5 && "Cannot handle a constant array where size > 4");
+    if (size > 4) {
+      size = 4;
+    }
+    for (uint32_t x = 0; x < size; ++x) {
+      emitCPInst(MI, CV->getOperand(0), KM, x, ExtFPLoad);
+    }
+  } else if (const ConstantDataVector* CV = dyn_cast<ConstantDataVector>(C)) {
+    // TODO: Make this handle vectors natively up to the correct
+    // size
+    uint32_t size = CV->getNumElements();
+    assert(size < 5 && "Cannot handle a constant array where size > 4");
+    if (size > 4) {
+      size = 4;
+    }
+    for (uint32_t x = 0; x < size; ++x) {
+      emitCPInst(MI, CV->getElementAsConstant(0), KM, x, ExtFPLoad);
+    }
+  } else {
+    // TODO: Do we really need to handle ConstantPointerNull?
+    // What about BlockAddress, ConstantExpr and Undef?
+    // How would these even be generated by a valid CL program?
+    assert(0 && "Found a constant type that I don't know how to handle");
+  }
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,330 @@
+//===-- AMDILIOExpansion.h ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDIL IO Expansion class expands pseudo IO instructions into a sequence
+// of instructions that produces the correct results. These instructions are not
+// expanded earlier in the backend because any pass before this can assume to be able
+// to generate a load store instruction. So this pass can only have passes that
+// execute after it if no load store instructions can be generated in those passes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILIOEXPANSION_H_
+#define _AMDILIOEXPANSION_H_
+#undef DEBUG_TYPE
+#undef DEBUGME
+#define DEBUG_TYPE "IOExpansion"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+#include "llvm/Type.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm
+{
+class MachineFunction;
+class AMDILKernelManager;
+class AMDILMachineFunctionInfo;
+class AMDILSubtarget;
+class MachineInstr;
+class Constant;
+class TargetInstrInfo;
+typedef enum {
+  NO_PACKING = 0,
+  PACK_V2I8,
+  PACK_V4I8,
+  PACK_V2I16,
+  PACK_V4I16,
+  UNPACK_V2I8,
+  UNPACK_V4I8,
+  UNPACK_V2I16,
+  UNPACK_V4I16,
+  UNPACK_LAST
+} REG_PACKED_TYPE;
+class AMDILIOExpansion : public MachineFunctionPass
+{
+public:
+  virtual ~AMDILIOExpansion();
+  virtual const char* getPassName() const;
+  bool runOnMachineFunction(MachineFunction &MF);
+  static char ID;
+protected:
+  AMDILIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+  //
+  // @param MI Machine instruction to check.
+  // @brief checks to see if the machine instruction
+  // is an I/O instruction or not.
+  //
+  // @return true if I/O, false otherwise.
+  //
+  virtual bool
+  isIOInstruction(MachineInstr *MI);
+  // Wrapper function that calls the appropriate I/O
+  // expansion function based on the instruction type.
+  virtual void
+  expandIOInstruction(MachineInstr *MI);
+  virtual void
+  expandGlobalStore(MachineInstr *MI) = 0;
+  virtual void
+  expandLocalStore(MachineInstr *MI) = 0;
+  virtual void
+  expandRegionStore(MachineInstr *MI) = 0;
+  virtual void
+  expandPrivateStore(MachineInstr *MI) = 0;
+  virtual void
+  expandGlobalLoad(MachineInstr *MI) = 0;
+  virtual void
+  expandRegionLoad(MachineInstr *MI) = 0;
+  virtual void
+  expandLocalLoad(MachineInstr *MI) = 0;
+  virtual void
+  expandPrivateLoad(MachineInstr *MI) = 0;
+  virtual void
+  expandConstantLoad(MachineInstr *MI) = 0;
+  virtual void
+  expandConstantPoolLoad(MachineInstr *MI) = 0;
+  bool
+  isAddrCalcInstr(MachineInstr *MI);
+  bool
+  isExtendLoad(MachineInstr *MI);
+  bool
+  isHardwareRegion(MachineInstr *MI);
+  bool
+  isHardwareLocal(MachineInstr *MI);
+  bool
+  isPackedData(MachineInstr *MI);
+  bool
+  isStaticCPLoad(MachineInstr *MI);
+  bool
+  isNbitType(Type *MI, uint32_t nBits, bool isScalar = true);
+  bool
+  isHardwareInst(MachineInstr *MI);
+  uint32_t
+  getMemorySize(MachineInstr *MI);
+  REG_PACKED_TYPE
+  getPackedID(MachineInstr *MI);
+  uint32_t
+  getShiftSize(MachineInstr *MI);
+  uint32_t
+  getPointerID(MachineInstr *MI);
+  uint32_t
+  getDataReg(MachineInstr *MI);
+  void
+  expandTruncData(MachineInstr *MI);
+  void
+  expandLoadStartCode(MachineInstr *MI);
+  virtual void
+  expandStoreSetupCode(MachineInstr *MI) = 0;
+  void
+  expandAddressCalc(MachineInstr *MI);
+  unsigned
+  expandLongExtend(MachineInstr *MI,
+                   uint32_t numComponents, uint32_t size, bool signedShift);
+  unsigned
+  expandLongExtendSub32(MachineInstr *MI,
+                        unsigned SHLop, unsigned SHRop, unsigned USHRop,
+                        unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm,
+                        unsigned LCRop, bool signedShift, bool vec2);
+  unsigned
+  expandIntegerExtend(MachineInstr *MI, unsigned,
+                      unsigned, unsigned, unsigned);
+  unsigned
+  expandExtendLoad(MachineInstr *MI);
+  virtual void
+  expandPackedData(MachineInstr *MI) = 0;
+  void
+  emitCPInst(MachineInstr* MI, const Constant* C,
+             AMDILKernelManager* KM, int swizzle, bool ExtFPLoad);
+
+  bool mDebug;
+  const AMDILSubtarget *mSTM;
+  AMDILKernelManager *mKM;
+  MachineBasicBlock *mBB;
+  AMDILMachineFunctionInfo *mMFI;
+  const TargetInstrInfo *mTII;
+  bool saveInst;
+protected:
+  void
+  emitStaticCPLoad(MachineInstr* MI, int swizzle, int id,
+                   bool ExtFPLoad);
+  TargetMachine &TM;
+}; // class AMDILIOExpansion
+
+// Intermediate class that holds I/O code expansion that is common to the
+// 7XX, Evergreen and Northern Island family of chips.
+class AMDIL789IOExpansion : public AMDILIOExpansion
+{
+public:
+  virtual ~AMDIL789IOExpansion();
+  virtual const char* getPassName() const;
+protected:
+  AMDIL789IOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+  virtual void
+  expandGlobalStore(MachineInstr *MI) = 0;
+  virtual void
+  expandLocalStore(MachineInstr *MI) = 0;
+  virtual void
+  expandRegionStore(MachineInstr *MI) = 0;
+  virtual void
+  expandGlobalLoad(MachineInstr *MI) = 0;
+  virtual void
+  expandRegionLoad(MachineInstr *MI) = 0;
+  virtual void
+  expandLocalLoad(MachineInstr *MI) = 0;
+  virtual void
+  expandPrivateStore(MachineInstr *MI);
+  virtual void
+  expandConstantLoad(MachineInstr *MI);
+  virtual void
+  expandPrivateLoad(MachineInstr *MI) ;
+  virtual void
+  expandConstantPoolLoad(MachineInstr *MI);
+  void
+  expandStoreSetupCode(MachineInstr *MI);
+  virtual void
+  expandPackedData(MachineInstr *MI);
+private:
+  void emitVectorAddressCalc(MachineInstr *MI, bool is32bit,
+                             bool needsSelect);
+  void emitVectorSwitchWrite(MachineInstr *MI, bool is32bit);
+  void emitComponentExtract(MachineInstr *MI, unsigned src,
+                            unsigned dst, bool beforeInst);
+  void emitDataLoadSelect(MachineInstr *MI);
+}; // class AMDIL789IOExpansion
+// Class that handles I/O emission for the 7XX family of devices.
+class AMDIL7XXIOExpansion : public AMDIL789IOExpansion
+{
+public:
+  AMDIL7XXIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+
+  ~AMDIL7XXIOExpansion();
+  const char* getPassName() const;
+protected:
+  void
+  expandGlobalStore(MachineInstr *MI);
+  void
+  expandLocalStore(MachineInstr *MI);
+  void
+  expandRegionStore(MachineInstr *MI);
+  void
+  expandGlobalLoad(MachineInstr *MI);
+  void
+  expandRegionLoad(MachineInstr *MI);
+  void
+  expandLocalLoad(MachineInstr *MI);
+}; // class AMDIL7XXIOExpansion
+
+// Class that handles image functions to expand them into the
+// correct set of I/O instructions.
+class AMDILImageExpansion : public AMDIL789IOExpansion
+{
+public:
+  AMDILImageExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+
+  virtual ~AMDILImageExpansion();
+protected:
+  //
+  // @param MI Instruction iterator that has the sample instruction
+  // that needs to be taken care of.
+  // @brief transforms the __amdil_sample_data function call into a
+  // sample instruction in IL.
+  //
+  // @warning This function only works correctly if all functions get
+  // inlined
+  //
+  virtual void
+  expandImageLoad(MachineBasicBlock *BB, MachineInstr *MI);
+  //
+  // @param MI Instruction iterator that has the write instruction that
+  // needs to be taken care of.
+  // @brief transforms the __amdil_write_data function call into a
+  // simple UAV write instruction in IL.
+  //
+  // @warning This function only works correctly if all functions get
+  // inlined
+  //
+  virtual void
+  expandImageStore(MachineBasicBlock *BB, MachineInstr *MI);
+  //
+  // @param MI Instruction interator that has the image parameter
+  // instruction
+  // @brief transforms the __amdil_get_image_params function call into
+  // a copy of data from a specific constant buffer to the register
+  //
+  // @warning This function only works correctly if all functions get
+  // inlined
+  //
+  virtual void
+  expandImageParam(MachineBasicBlock *BB, MachineInstr *MI);
+
+  //
+  // @param MI Insturction that points to the image
+  // @brief transforms __amdil_sample_data into a sequence of
+  // if/else that selects the correct sample instruction.
+  //
+  // @warning This function is inefficient and works with no
+  // inlining.
+  //
+  virtual void
+  expandInefficientImageLoad(MachineBasicBlock *BB, MachineInstr *MI);
+private:
+  AMDILImageExpansion(); // Do not implement.
+
+}; // class AMDILImageExpansion
+
+// Class that expands IO instructions for Evergreen and Northern
+// Island family of devices.
+class AMDILEGIOExpansion : public AMDILImageExpansion
+{
+public:
+  AMDILEGIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+
+  virtual ~AMDILEGIOExpansion();
+  const char* getPassName() const;
+protected:
+  virtual bool
+  isIOInstruction(MachineInstr *MI);
+  virtual void
+  expandIOInstruction(MachineInstr *MI);
+  bool
+  isImageIO(MachineInstr *MI);
+  virtual void
+  expandGlobalStore(MachineInstr *MI);
+  void
+  expandLocalStore(MachineInstr *MI);
+  void
+  expandRegionStore(MachineInstr *MI);
+  virtual void
+  expandGlobalLoad(MachineInstr *MI);
+  void
+  expandRegionLoad(MachineInstr *MI);
+  void
+  expandLocalLoad(MachineInstr *MI);
+  virtual bool
+  isCacheableOp(MachineInstr *MI);
+  void
+  expandStoreSetupCode(MachineInstr *MI);
+  void
+  expandPackedData(MachineInstr *MI);
+private:
+  bool
+  isArenaOp(MachineInstr *MI);
+  void
+  expandArenaSetup(MachineInstr *MI);
+}; // class AMDILEGIOExpansion
+} // namespace llvm
+#endif // _AMDILIOEXPANSION_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,505 @@
+//===-- AMDILISelDAGToDAG.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AMDIL target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILDevices.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
+// //for SelectionDAG operations.
+//
+namespace
+{
+class AMDILDAGToDAGISel : public SelectionDAGISel
+{
+  // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
+  // make the right decision when generating code for different targets.
+  const AMDILSubtarget *Subtarget;
+public:
+  explicit AMDILDAGToDAGISel(AMDILTargetMachine &TM, CodeGenOpt::Level OptLevel)
+    : SelectionDAGISel(TM, OptLevel),
+      Subtarget(&TM.getSubtarget<AMDILSubtarget>()) {}
+  virtual ~AMDILDAGToDAGISel() {};
+  inline SDValue getSmallIPtrImm(unsigned Imm);
+
+  SDNode *Select(SDNode *N);
+  // Complex pattern selectors
+  bool SelectADDR(
+    SDValue N, SDValue &R1, SDValue &R2);
+  bool SelectADDR64(
+    SDValue N, SDValue &R1, SDValue &R2);
+  bool isGlobalStore(const StoreSDNode *N) const;
+  bool isPrivateStore(const StoreSDNode *N) const;
+  bool isLocalStore(const StoreSDNode *N) const;
+  bool isRegionStore(const StoreSDNode *N) const;
+  bool isFlatStore(const StoreSDNode *N) const;
+
+  bool isCPLoad(const LoadSDNode *N) const;
+  bool isConstantLoad(const LoadSDNode *N, int cbID) const;
+  bool isGlobalLoad(const LoadSDNode *N) const;
+  bool isPrivateLoad(const LoadSDNode *N) const;
+  bool isLocalLoad(const LoadSDNode *N) const;
+  bool isRegionLoad(const LoadSDNode *N) const;
+  bool isFlatLoad(const LoadSDNode *N) const;
+  bool isFlatASOverrideEnabled() const;
+
+  virtual const char *getPassName() const;
+private:
+  SDNode *xformAtomicInst(SDNode *N);
+
+  // Include the pieces autogenerated from the target description.
+#include "AMDILGenDAGISel.inc"
+};
+}  // end anonymous namespace
+
+// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific
+// DAG, ready for instruction scheduling.
+//
+FunctionPass *llvm::createAMDILISelDag(AMDILTargetMachine &TM,
+                                       llvm::CodeGenOpt::Level OptLevel)
+{
+  return new AMDILDAGToDAGISel(TM, OptLevel);
+}
+
+SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm)
+{
+  return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDILDAGToDAGISel::SelectADDR(
+  SDValue Addr, SDValue& R1, SDValue& R2)
+{
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress) {
+    return false;
+  }
+
+  if (Addr.getOpcode() == ISD::FrameIndex) {
+    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+      R2 = CurDAG->getTargetConstant(0, MVT::i32);
+    } else {
+      R1 = Addr;
+      R2 = CurDAG->getTargetConstant(0, MVT::i32);
+    }
+  } else if (Addr.getOpcode() == ISD::ADD) {
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+  } else {
+    R1 = Addr;
+    R2 = CurDAG->getTargetConstant(0, MVT::i32);
+  }
+  return true;
+}
+
+
+bool AMDILDAGToDAGISel::SelectADDR64(
+  SDValue Addr, SDValue& R1, SDValue& R2)
+{
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress) {
+    return false;
+  }
+
+  if (Addr.getOpcode() == ISD::FrameIndex) {
+    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+      R2 = CurDAG->getTargetConstant(0, MVT::i64);
+    } else {
+      R1 = Addr;
+      R2 = CurDAG->getTargetConstant(0, MVT::i64);
+    }
+  } else if (Addr.getOpcode() == ISD::ADD) {
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+  } else {
+    R1 = Addr;
+    R2 = CurDAG->getTargetConstant(0, MVT::i64);
+  }
+  return true;
+}
+
+SDNode *AMDILDAGToDAGISel::Select(SDNode *N)
+{
+  unsigned int Opc = N->getOpcode();
+  if (N->isMachineOpcode()) {
+    return NULL;   // Already selected.
+  }
+  switch (Opc) {
+  default:
+    break;
+  case ISD::FrameIndex: {
+    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
+      unsigned int FI = FIN->getIndex();
+      EVT OpVT = N->getValueType(0);
+      unsigned int NewOpc = AMDIL::MOVE_i32;
+      SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+      return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
+    }
+  }
+  break;
+  }
+  // For all atomic instructions, we need to add a constant
+  // operand that stores the resource ID in the instruction
+  if (Opc > AMDILISD::ADDADDR && Opc < AMDILISD::APPEND_ALLOC) {
+    N = xformAtomicInst(N);
+  }
+  return SelectCode(N);
+}
+
+bool AMDILDAGToDAGISel::isFlatASOverrideEnabled() const
+{
+  return Subtarget->overridesFlatAS();
+}
+
+bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) const
+{
+  return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+         && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isFlatStore(const StoreSDNode *N) const
+{
+  return check_type(N->getSrcValue(), AMDILAS::FLAT_ADDRESS)
+         || (isFlatASOverrideEnabled()
+             && (check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+                 || check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+                 || check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
+                 || check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS))
+            );
+}
+
+bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) const
+{
+  return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+          && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+          && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS))
+         && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) const
+{
+  return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+         && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) const
+{
+  return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) const
+{
+  if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+      && !isFlatASOverrideEnabled()) {
+    return true;
+  }
+  MachineMemOperand *MMO = N->getMemOperand();
+  const Value *V = MMO->getValue();
+  const Value *BV = getBasePointerValue(V);
+  if (MMO
+      && MMO->getValue()
+      && ((V && dyn_cast<GlobalValue>(V))
+          || (BV && dyn_cast<GlobalValue>(
+                getBasePointerValue(MMO->getValue()))))) {
+    return check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
+           && !isFlatASOverrideEnabled();
+  } else {
+    return false;
+  }
+}
+
+bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const
+{
+  return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+         && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const
+{
+  return check_type(N->getSrcValue(), AMDILAS::FLAT_ADDRESS)
+         || (isFlatASOverrideEnabled()
+             && (check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+                 || check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+                 || check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
+                 || check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS))
+            );
+}
+
+bool AMDILDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const
+{
+  return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+         && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const
+{
+  return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
+         && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) const
+{
+  MachineMemOperand *MMO = N->getMemOperand();
+  if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
+      && !isFlatASOverrideEnabled()) {
+    if (MMO) {
+      const Value *V = MMO->getValue();
+      const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+      if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const
+{
+  if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
+      && !isFlatASOverrideEnabled()) {
+    // Check to make sure we are not a constant pool load or a constant load
+    // that is marked as a private load
+    if (isCPLoad(N) || isConstantLoad(N, -1)) {
+      return false;
+    }
+  }
+  if (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+      && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+      && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
+      && !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+      && !isFlatASOverrideEnabled()) {
+    return true;
+  }
+  return false;
+}
+
+const char *AMDILDAGToDAGISel::getPassName() const
+{
+  return "AMDIL DAG->DAG Pattern Instruction Selection";
+}
+
+SDNode*
+AMDILDAGToDAGISel::xformAtomicInst(SDNode *N)
+{
+  uint32_t addVal = 1;
+  bool addOne = false;
+  unsigned opc = N->getOpcode();
+  switch (opc) {
+  default:
+    return N;
+  case AMDILISD::ATOM_G_ADD:
+  case AMDILISD::ATOM_G_AND:
+  case AMDILISD::ATOM_G_MAX:
+  case AMDILISD::ATOM_G_UMAX:
+  case AMDILISD::ATOM_G_MIN:
+  case AMDILISD::ATOM_G_UMIN:
+  case AMDILISD::ATOM_G_OR:
+  case AMDILISD::ATOM_G_SUB:
+  case AMDILISD::ATOM_G_RSUB:
+  case AMDILISD::ATOM_G_XCHG:
+  case AMDILISD::ATOM_G_XOR:
+  case AMDILISD::ATOM_G_ADD_NORET:
+  case AMDILISD::ATOM_G_AND_NORET:
+  case AMDILISD::ATOM_G_MAX_NORET:
+  case AMDILISD::ATOM_G_UMAX_NORET:
+  case AMDILISD::ATOM_G_MIN_NORET:
+  case AMDILISD::ATOM_G_UMIN_NORET:
+  case AMDILISD::ATOM_G_OR_NORET:
+  case AMDILISD::ATOM_G_SUB_NORET:
+  case AMDILISD::ATOM_G_RSUB_NORET:
+  case AMDILISD::ATOM_G_XCHG_NORET:
+  case AMDILISD::ATOM_G_XOR_NORET:
+  case AMDILISD::ATOM_L_ADD:
+  case AMDILISD::ATOM_L_AND:
+  case AMDILISD::ATOM_L_MAX:
+  case AMDILISD::ATOM_L_UMAX:
+  case AMDILISD::ATOM_L_MIN:
+  case AMDILISD::ATOM_L_UMIN:
+  case AMDILISD::ATOM_L_OR:
+  case AMDILISD::ATOM_L_SUB:
+  case AMDILISD::ATOM_L_RSUB:
+  case AMDILISD::ATOM_L_XCHG:
+  case AMDILISD::ATOM_L_XOR:
+  case AMDILISD::ATOM_L_ADD_NORET:
+  case AMDILISD::ATOM_L_AND_NORET:
+  case AMDILISD::ATOM_L_MAX_NORET:
+  case AMDILISD::ATOM_L_UMAX_NORET:
+  case AMDILISD::ATOM_L_MIN_NORET:
+  case AMDILISD::ATOM_L_UMIN_NORET:
+  case AMDILISD::ATOM_L_OR_NORET:
+  case AMDILISD::ATOM_L_SUB_NORET:
+  case AMDILISD::ATOM_L_RSUB_NORET:
+  case AMDILISD::ATOM_L_XCHG_NORET:
+  case AMDILISD::ATOM_L_XOR_NORET:
+  case AMDILISD::ATOM_R_ADD:
+  case AMDILISD::ATOM_R_AND:
+  case AMDILISD::ATOM_R_MAX:
+  case AMDILISD::ATOM_R_UMAX:
+  case AMDILISD::ATOM_R_MIN:
+  case AMDILISD::ATOM_R_UMIN:
+  case AMDILISD::ATOM_R_OR:
+  case AMDILISD::ATOM_R_SUB:
+  case AMDILISD::ATOM_R_RSUB:
+  case AMDILISD::ATOM_R_XCHG:
+  case AMDILISD::ATOM_R_XOR:
+  case AMDILISD::ATOM_R_ADD_NORET:
+  case AMDILISD::ATOM_R_AND_NORET:
+  case AMDILISD::ATOM_R_MAX_NORET:
+  case AMDILISD::ATOM_R_UMAX_NORET:
+  case AMDILISD::ATOM_R_MIN_NORET:
+  case AMDILISD::ATOM_R_UMIN_NORET:
+  case AMDILISD::ATOM_R_OR_NORET:
+  case AMDILISD::ATOM_R_SUB_NORET:
+  case AMDILISD::ATOM_R_RSUB_NORET:
+  case AMDILISD::ATOM_R_XCHG_NORET:
+  case AMDILISD::ATOM_R_XOR_NORET:
+  case AMDILISD::ATOM_G_CMPXCHG:
+  case AMDILISD::ATOM_G_CMPXCHG_NORET:
+  case AMDILISD::ATOM_L_CMPXCHG:
+  case AMDILISD::ATOM_L_CMPXCHG_NORET:
+  case AMDILISD::ATOM_R_CMPXCHG:
+  case AMDILISD::ATOM_R_CMPXCHG_NORET:
+    break;
+  case AMDILISD::ATOM_G_DEC:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_G_SUB;
+    }
+    break;
+  case AMDILISD::ATOM_G_INC:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_G_ADD;
+    }
+    break;
+  case AMDILISD::ATOM_G_DEC_NORET:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_G_SUB_NORET;
+    }
+    break;
+  case AMDILISD::ATOM_G_INC_NORET:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_G_ADD_NORET;
+    }
+    break;
+  case AMDILISD::ATOM_L_DEC:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_L_SUB;
+    }
+    break;
+  case AMDILISD::ATOM_L_INC:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_L_ADD;
+    }
+    break;
+  case AMDILISD::ATOM_L_DEC_NORET:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_L_SUB_NORET;
+    }
+    break;
+  case AMDILISD::ATOM_L_INC_NORET:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_L_ADD_NORET;
+    }
+    break;
+  case AMDILISD::ATOM_R_DEC:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_R_SUB;
+    }
+    break;
+  case AMDILISD::ATOM_R_INC:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_R_ADD;
+    }
+    break;
+  case AMDILISD::ATOM_R_DEC_NORET:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_R_SUB;
+    }
+    break;
+  case AMDILISD::ATOM_R_INC_NORET:
+    addOne = true;
+    if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+      addVal = (uint32_t)-1;
+    } else {
+      opc = AMDILISD::ATOM_R_ADD_NORET;
+    }
+    break;
+  }
+  // The largest we can have is a cmpxchg w/ a return value and an output chain.
+  // The cmpxchg function has 3 inputs and a single output along with an
+  // output change and a target constant, giving a total of 6.
+  SDValue Ops[12];
+  unsigned x = 0;
+  unsigned y = N->getNumOperands();
+  for (x = 0; x < y; ++x) {
+    Ops[x] = N->getOperand(x);
+  }
+  if (addOne) {
+    Ops[x++] = SDValue(SelectCode(CurDAG->getConstant(addVal, MVT::i32).getNode()), 0);
+  }
+  Ops[x++] = CurDAG->getTargetConstant(0, MVT::i32);
+  SDVTList Tys = N->getVTList();
+  MemSDNode *MemNode = dyn_cast<MemSDNode>(N);
+  assert(MemNode && "Atomic should be of MemSDNode type!");
+  N = CurDAG->getMemIntrinsicNode(opc, N->getDebugLoc(), Tys, Ops, x,
+                                  MemNode->getMemoryVT(), MemNode->getMemOperand()).getNode();
+  return N;
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,6010 @@
+//===-- AMDILISelLowering.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that AMDIL uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILISelLowering.h"
+#include "AMDILDevices.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILLLVMPC.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetOptions.h"
+#include "../../CodeGen/SelectionDAG/SDNodeDbgValue.h"
+using namespace llvm;
+#define ISDBITCAST  ISD::BITCAST
+#define MVTGLUE     MVT::Glue
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+#include "AMDILGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions Begin
+//===----------------------------------------------------------------------===//
+static SDValue
+getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
+{
+  DebugLoc DL = Src.getDebugLoc();
+  EVT svt = Src.getValueType().getScalarType();
+  EVT dvt = Dst.getValueType().getScalarType();
+  if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
+    if (dvt.bitsGT(svt)) {
+      Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
+    } else if (svt.bitsLT(svt)) {
+      Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
+                        DAG.getConstant(1, MVT::i32));
+    }
+  } else if (svt.isInteger() && dvt.isInteger()) {
+    if (!svt.bitsEq(dvt)) {
+      Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+    } else {
+      Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
+    }
+  } else if (svt.isInteger()) {
+    unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
+    if (!svt.bitsEq(dvt)) {
+      if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
+        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
+      } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
+        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
+      } else {
+        assert(0 && "We only support 32 and 64bit fp types");
+      }
+    }
+    Src = DAG.getNode(opcode, DL, dvt, Src);
+  } else if (dvt.isInteger()) {
+    unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
+    if (svt.getSimpleVT().SimpleTy == MVT::f32) {
+      Src = DAG.getNode(opcode, DL, MVT::i32, Src);
+    } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
+      Src = DAG.getNode(opcode, DL, MVT::i64, Src);
+    } else {
+      assert(0 && "We only support 32 and 64bit fp types");
+    }
+    Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+  }
+  return Src;
+}
+// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
+// condition.
+static AMDILCC::CondCodes
+CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
+{
+  switch (CC) {
+  default: {
+    errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
+    assert(0 && "Unknown condition code!");
+  }
+  case ISD::SETO:
+    switch(type) {
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_O;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_O;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETUO:
+    switch(type) {
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_UO;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_UO;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETGT:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_I_GT;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_GT;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_GT;
+    case MVT::i64:
+      return AMDILCC::IL_CC_L_GT;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETGE:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_I_GE;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_GE;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_GE;
+    case MVT::i64:
+      return AMDILCC::IL_CC_L_GE;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETLT:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_I_LT;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_LT;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_LT;
+    case MVT::i64:
+      return AMDILCC::IL_CC_L_LT;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETLE:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_I_LE;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_LE;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_LE;
+    case MVT::i64:
+      return AMDILCC::IL_CC_L_LE;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETNE:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_I_NE;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_NE;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_NE;
+    case MVT::i64:
+      return AMDILCC::IL_CC_L_NE;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETEQ:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_I_EQ;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_EQ;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_EQ;
+    case MVT::i64:
+      return AMDILCC::IL_CC_L_EQ;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETUGT:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_U_GT;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_UGT;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_UGT;
+    case MVT::i64:
+      return AMDILCC::IL_CC_UL_GT;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETUGE:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_U_GE;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_UGE;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_UGE;
+    case MVT::i64:
+      return AMDILCC::IL_CC_UL_GE;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETULT:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_U_LT;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_ULT;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_ULT;
+    case MVT::i64:
+      return AMDILCC::IL_CC_UL_LT;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETULE:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_U_LE;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_ULE;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_ULE;
+    case MVT::i64:
+      return AMDILCC::IL_CC_UL_LE;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETUNE:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_U_NE;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_UNE;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_UNE;
+    case MVT::i64:
+      return AMDILCC::IL_CC_UL_NE;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETUEQ:
+    switch (type) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      return AMDILCC::IL_CC_U_EQ;
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_UEQ;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_UEQ;
+    case MVT::i64:
+      return AMDILCC::IL_CC_UL_EQ;
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETOGT:
+    switch (type) {
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_OGT;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_OGT;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETOGE:
+    switch (type) {
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_OGE;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_OGE;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETOLT:
+    switch (type) {
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_OLT;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_OLT;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETOLE:
+    switch (type) {
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_OLE;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_OLE;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETONE:
+    switch (type) {
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_ONE;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_ONE;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  case ISD::SETOEQ:
+    switch (type) {
+    case MVT::f32:
+      return AMDILCC::IL_CC_F_OEQ;
+    case MVT::f64:
+      return AMDILCC::IL_CC_D_OEQ;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+    default:
+      assert(0 && "Opcode combination not generated correctly!");
+      return AMDILCC::COND_ERROR;
+    };
+  };
+}
+
+static unsigned int
+translateToOpcode(uint64_t CCCode, unsigned int regClass)
+{
+  switch (CCCode) {
+  case AMDILCC::IL_CC_D_EQ:
+  case AMDILCC::IL_CC_D_OEQ:
+    if (regClass == AMDIL::GPRV2F64RegClassID) {
+      return (unsigned int)AMDIL::DEQ_v2f64;
+    } else {
+      return (unsigned int)AMDIL::DEQ;
+    }
+  case AMDILCC::IL_CC_D_LE:
+  case AMDILCC::IL_CC_D_OLE:
+  case AMDILCC::IL_CC_D_ULE:
+  case AMDILCC::IL_CC_D_GE:
+  case AMDILCC::IL_CC_D_OGE:
+  case AMDILCC::IL_CC_D_UGE:
+    return (unsigned int)AMDIL::DGE;
+  case AMDILCC::IL_CC_D_LT:
+  case AMDILCC::IL_CC_D_OLT:
+  case AMDILCC::IL_CC_D_ULT:
+  case AMDILCC::IL_CC_D_GT:
+  case AMDILCC::IL_CC_D_OGT:
+  case AMDILCC::IL_CC_D_UGT:
+    return (unsigned int)AMDIL::DLT;
+  case AMDILCC::IL_CC_D_NE:
+  case AMDILCC::IL_CC_D_UNE:
+    return (unsigned int)AMDIL::DNE;
+  case AMDILCC::IL_CC_F_EQ:
+  case AMDILCC::IL_CC_F_OEQ:
+    return (unsigned int)AMDIL::FEQ;
+  case AMDILCC::IL_CC_F_LE:
+  case AMDILCC::IL_CC_F_ULE:
+  case AMDILCC::IL_CC_F_OLE:
+  case AMDILCC::IL_CC_F_GE:
+  case AMDILCC::IL_CC_F_UGE:
+  case AMDILCC::IL_CC_F_OGE:
+    return (unsigned int)AMDIL::FGE;
+  case AMDILCC::IL_CC_F_LT:
+  case AMDILCC::IL_CC_F_OLT:
+  case AMDILCC::IL_CC_F_ULT:
+  case AMDILCC::IL_CC_F_GT:
+  case AMDILCC::IL_CC_F_OGT:
+  case AMDILCC::IL_CC_F_UGT:
+    if (regClass == AMDIL::GPRV2F32RegClassID) {
+      return (unsigned int)AMDIL::FLT_v2f32;
+    } else if (regClass == AMDIL::GPRV4F32RegClassID) {
+      return (unsigned int)AMDIL::FLT_v4f32;
+    } else {
+      return (unsigned int)AMDIL::FLT;
+    }
+  case AMDILCC::IL_CC_F_NE:
+  case AMDILCC::IL_CC_F_UNE:
+    return (unsigned int)AMDIL::FNE;
+  case AMDILCC::IL_CC_I_EQ:
+  case AMDILCC::IL_CC_U_EQ:
+    if (regClass == AMDIL::GPRI32RegClassID
+        || regClass == AMDIL::GPRI8RegClassID
+        || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::IEQ;
+    } else if (regClass == AMDIL::GPRV2I32RegClassID
+               || regClass == AMDIL::GPRV2I8RegClassID
+               || regClass == AMDIL::GPRV2I16RegClassID) {
+      return (unsigned int)AMDIL::IEQ_v2i32;
+    } else if (regClass == AMDIL::GPRV4I32RegClassID
+               || regClass == AMDIL::GPRV4I8RegClassID
+               || regClass == AMDIL::GPRV4I16RegClassID) {
+      return (unsigned int)AMDIL::IEQ_v4i32;
+    } else {
+      assert(!"Unknown reg class!");
+    }
+  case AMDILCC::IL_CC_L_EQ:
+  case AMDILCC::IL_CC_UL_EQ:
+    return (unsigned int)AMDIL::LEQ;
+  case AMDILCC::IL_CC_I_GE:
+  case AMDILCC::IL_CC_I_LE:
+    if (regClass == AMDIL::GPRI32RegClassID
+        || regClass == AMDIL::GPRI8RegClassID
+        || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::IGE;
+    } else if (regClass == AMDIL::GPRV2I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::IGE_v2i32;
+    } else if (regClass == AMDIL::GPRV4I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::IGE_v4i32;
+    } else {
+      assert(!"Unknown reg class!");
+    }
+  case AMDILCC::IL_CC_I_LT:
+  case AMDILCC::IL_CC_I_GT:
+    if (regClass == AMDIL::GPRI32RegClassID
+        || regClass == AMDIL::GPRI8RegClassID
+        || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::ILT;
+    } else if (regClass == AMDIL::GPRV2I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::ILT_v2i32;
+    } else if (regClass == AMDIL::GPRV4I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::ILT_v4i32;
+    } else {
+      assert(!"Unknown reg class!");
+    }
+  case AMDILCC::IL_CC_L_GE:
+    return (unsigned int)AMDIL::LGE;
+  case AMDILCC::IL_CC_L_LE:
+    return (unsigned int)AMDIL::LLE;
+  case AMDILCC::IL_CC_L_LT:
+    return (unsigned int)AMDIL::LLT;
+  case AMDILCC::IL_CC_L_GT:
+    return (unsigned int)AMDIL::LGT;
+  case AMDILCC::IL_CC_I_NE:
+  case AMDILCC::IL_CC_U_NE:
+    if (regClass == AMDIL::GPRI32RegClassID
+        || regClass == AMDIL::GPRI8RegClassID
+        || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::INE;
+    } else if (regClass == AMDIL::GPRV2I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::INE_v2i32;
+    } else if (regClass == AMDIL::GPRV4I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::INE_v4i32;
+    } else {
+      assert(!"Unknown reg class!");
+    }
+  case AMDILCC::IL_CC_U_GE:
+  case AMDILCC::IL_CC_U_LE:
+    if (regClass == AMDIL::GPRI32RegClassID
+        || regClass == AMDIL::GPRI8RegClassID
+        || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::UGE;
+    } else if (regClass == AMDIL::GPRV2I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::UGE_v2i32;
+    } else if (regClass == AMDIL::GPRV4I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::UGE_v4i32;
+    } else {
+      assert(!"Unknown reg class!");
+    }
+  case AMDILCC::IL_CC_L_NE:
+  case AMDILCC::IL_CC_UL_NE:
+    return (unsigned int)AMDIL::LNE;
+  case AMDILCC::IL_CC_UL_GE:
+    return (unsigned int)AMDIL::ULGE;
+  case AMDILCC::IL_CC_UL_LE:
+    return (unsigned int)AMDIL::ULLE;
+  case AMDILCC::IL_CC_U_LT:
+    if (regClass == AMDIL::GPRI32RegClassID
+        || regClass == AMDIL::GPRI8RegClassID
+        || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::ULT;
+    } else if (regClass == AMDIL::GPRV2I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::ULT_v2i32;
+    } else if (regClass == AMDIL::GPRV4I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::ULT_v4i32;
+    } else {
+      assert(!"Unknown reg class!");
+    }
+  case AMDILCC::IL_CC_U_GT:
+    if (regClass == AMDIL::GPRI32RegClassID
+        || regClass == AMDIL::GPRI8RegClassID
+        || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::UGT;
+    } else if (regClass == AMDIL::GPRV2I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::UGT_v2i32;
+    } else if (regClass == AMDIL::GPRV4I32RegClassID
+               || regClass == AMDIL::GPRI8RegClassID
+               || regClass == AMDIL::GPRI16RegClassID) {
+      return (unsigned int)AMDIL::UGT_v4i32;
+    } else {
+      assert(!"Unknown reg class!");
+    }
+  case AMDILCC::IL_CC_UL_LT:
+    return (unsigned int)AMDIL::ULLT;
+  case AMDILCC::IL_CC_UL_GT:
+    return (unsigned int)AMDIL::ULGT;
+  case AMDILCC::IL_CC_F_UEQ:
+  case AMDILCC::IL_CC_D_UEQ:
+  case AMDILCC::IL_CC_F_ONE:
+  case AMDILCC::IL_CC_D_ONE:
+  case AMDILCC::IL_CC_F_O:
+  case AMDILCC::IL_CC_F_UO:
+  case AMDILCC::IL_CC_D_O:
+  case AMDILCC::IL_CC_D_UO:
+    // we don't care
+    return 0;
+
+  }
+  errs()<<"Opcode: "<<CCCode<<"\n";
+  assert(0 && "Unknown opcode retrieved");
+  return 0;
+}
+SDValue
+AMDILTargetLowering::LowerMemArgument(
+  SDValue Chain,
+  CallingConv::ID CallConv,
+  const SmallVectorImpl<ISD::InputArg> &Ins,
+  DebugLoc dl, SelectionDAG &DAG,
+  const CCValAssign &VA,
+  MachineFrameInfo *MFI,
+  unsigned i) const
+{
+  // Create the nodes corresponding to a load from this parameter slot.
+  ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
+                          getTargetMachine().Options.GuaranteedTailCallOpt;
+  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+
+  // FIXME: For now, all byval parameter objects are marked mutable. This can
+  // be changed with more analysis.
+  // In case of tail call optimization mark all arguments mutable. Since they
+  // could be overwritten by lowering of arguments in case of a tail call.
+  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+                                  VA.getLocMemOffset(), isImmutable
+                                 );
+  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+  if (Flags.isByVal())
+    return FIN;
+  return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                     MachinePointerInfo::getFixedStack(FI),
+                     false, false, false, 0);
+}
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction generation functions
+//===----------------------------------------------------------------------===//
+uint32_t
+AMDILTargetLowering::addExtensionInstructions(
+  uint32_t reg, bool signedShift,
+  unsigned int simpleVT) const
+{
+  int shiftSize = 0;
+  uint32_t LShift, RShift, Promote, Demote;
+  uint32_t intRegClassID;
+  switch(simpleVT) {
+  default:
+    return reg;
+  case AMDIL::GPRI8RegClassID:
+    shiftSize = 24;
+    LShift = AMDIL::SHL_i32;
+    if (signedShift) {
+      RShift = AMDIL::SHR_i32;
+    } else {
+      RShift = AMDIL::USHR_i32;
+    }
+    Promote = AMDIL::IL_ASINT_i8;
+    Demote = AMDIL::IL_ASCHAR_i32;
+    intRegClassID = AMDIL::GPRI32RegClassID;
+    break;
+  case AMDIL::GPRV2I8RegClassID:
+    shiftSize = 24;
+    LShift = AMDIL::SHL_v2i32;
+    if (signedShift) {
+      RShift = AMDIL::SHR_v2i32;
+    } else {
+      RShift = AMDIL::USHR_v2i32;
+    }
+    Promote = AMDIL::IL_ASV2INT_v2i8;
+    Demote = AMDIL::IL_ASV2CHAR_v2i32;
+    intRegClassID = AMDIL::GPRV2I32RegClassID;
+    break;
+  case AMDIL::GPRV4I8RegClassID:
+    shiftSize = 24;
+    LShift = AMDIL::SHL_v4i32;
+    if (signedShift) {
+      RShift = AMDIL::SHR_v4i32;
+    } else {
+      RShift = AMDIL::USHR_v4i32;
+    }
+    Promote = AMDIL::IL_ASV4INT_v4i8;
+    Demote = AMDIL::IL_ASV4CHAR_v4i32;
+    intRegClassID = AMDIL::GPRV4I32RegClassID;
+    break;
+  case AMDIL::GPRI16RegClassID:
+    shiftSize = 16;
+    LShift = AMDIL::SHL_i32;
+    if (signedShift) {
+      RShift = AMDIL::SHR_i32;
+    } else {
+      RShift = AMDIL::USHR_i32;
+    }
+    Promote = AMDIL::IL_ASINT_i16;
+    Demote = AMDIL::IL_ASSHORT_i32;
+    intRegClassID = AMDIL::GPRI32RegClassID;
+    break;
+  case AMDIL::GPRV2I16RegClassID:
+    shiftSize = 16;
+    LShift = AMDIL::SHL_v2i32;
+    if (signedShift) {
+      RShift = AMDIL::SHR_v2i32;
+    } else {
+      RShift = AMDIL::USHR_v2i32;
+    }
+    Promote = AMDIL::IL_ASV2INT_v2i16;
+    Demote = AMDIL::IL_ASV2SHORT_v2i32;
+    intRegClassID = AMDIL::GPRV2I32RegClassID;
+    break;
+  case AMDIL::GPRV4I16RegClassID:
+    shiftSize = 16;
+    LShift = AMDIL::SHL_v4i32;
+    if (signedShift) {
+      RShift = AMDIL::SHR_v4i32;
+    } else {
+      RShift = AMDIL::USHR_v4i32;
+    }
+    Promote = AMDIL::IL_ASV4INT_v4i16;
+    Demote = AMDIL::IL_ASV4SHORT_v4i32;
+    intRegClassID = AMDIL::GPRV4I32RegClassID;
+    break;
+  };
+  uint32_t LoadReg = genVReg(simpleVT);
+  uint32_t tmp1 = genVReg(intRegClassID);
+  uint32_t tmp2 = genVReg(intRegClassID);
+  uint32_t tmp3 = genVReg(intRegClassID);
+  uint32_t dst = genVReg(simpleVT);
+  generateMachineInst(Promote, tmp1, reg);
+  generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
+  generateMachineInst(LShift, tmp2, tmp1, LoadReg);
+  generateMachineInst(RShift, tmp3, tmp2, LoadReg);
+  generateMachineInst(Demote, dst, tmp3);
+  return dst;
+}
+
+MachineOperand
+AMDILTargetLowering::convertToReg(MachineOperand op) const
+{
+  if (op.isReg()) {
+    return op;
+  } else if (op.isImm()) {
+    uint32_t loadReg
+    = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
+    generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
+    .addImm(op.getImm());
+    op.ChangeToRegister(loadReg, false);
+  } else if (op.isFPImm()) {
+    uint32_t loadReg
+    = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
+    generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
+    .addFPImm(op.getFPImm());
+    op.ChangeToRegister(loadReg, false);
+  } else if (op.isMBB()) {
+    op.ChangeToRegister(0, false);
+  } else if (op.isFI()) {
+    op.ChangeToRegister(0, false);
+  } else if (op.isCPI()) {
+    op.ChangeToRegister(0, false);
+  } else if (op.isJTI()) {
+    op.ChangeToRegister(0, false);
+  } else if (op.isGlobal()) {
+    op.ChangeToRegister(0, false);
+  } else if (op.isSymbol()) {
+    op.ChangeToRegister(0, false);
+  }/* else if (op.isMetadata()) {
+      op.ChangeToRegister(0, false);
+      }*/
+  return op;
+}
+
+void
+AMDILTargetLowering::generateCMPInstr(
+  MachineInstr *MI,
+  MachineBasicBlock *BB,
+  const TargetInstrInfo& TII)
+const
+{
+  MachineOperand DST = MI->getOperand(0);
+  MachineOperand CC = MI->getOperand(1);
+  MachineOperand LHS = MI->getOperand(2);
+  MachineOperand RHS = MI->getOperand(3);
+  int64_t ccCode = CC.getImm();
+  unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
+  unsigned int opCode = translateToOpcode(ccCode, simpleVT);
+  DebugLoc DL = MI->getDebugLoc();
+  MachineBasicBlock::iterator BBI = MI;
+  setPrivateData(BB, BBI, &DL, &TII);
+  if (!LHS.isReg()) {
+    LHS = convertToReg(LHS);
+  }
+  if (!RHS.isReg()) {
+    RHS = convertToReg(RHS);
+  }
+  uint32_t lhsreg = LHS.getReg();
+  uint32_t rhsreg = RHS.getReg();
+  switch (ccCode) {
+  case AMDILCC::IL_CC_I_EQ:
+  case AMDILCC::IL_CC_I_NE:
+  case AMDILCC::IL_CC_I_GE:
+  case AMDILCC::IL_CC_I_LT:
+  case AMDILCC::IL_CC_I_GT:
+  case AMDILCC::IL_CC_I_LE: {
+    lhsreg = addExtensionInstructions(lhsreg, true, simpleVT);
+    rhsreg = addExtensionInstructions(rhsreg, true, simpleVT);
+  }
+  break;
+  case AMDILCC::IL_CC_U_EQ:
+  case AMDILCC::IL_CC_U_NE:
+  case AMDILCC::IL_CC_U_GE:
+  case AMDILCC::IL_CC_U_LT:
+  case AMDILCC::IL_CC_U_GT:
+  case AMDILCC::IL_CC_U_LE: {
+    lhsreg = addExtensionInstructions(lhsreg, false, simpleVT);
+    rhsreg = addExtensionInstructions(rhsreg, false, simpleVT);
+  }
+  break;
+  }
+
+  switch (ccCode) {
+  case AMDILCC::IL_CC_I_EQ:
+  case AMDILCC::IL_CC_I_NE:
+  case AMDILCC::IL_CC_I_GE:
+  case AMDILCC::IL_CC_I_LT:
+  case AMDILCC::IL_CC_U_EQ:
+  case AMDILCC::IL_CC_U_NE:
+  case AMDILCC::IL_CC_U_GE:
+  case AMDILCC::IL_CC_U_LT:
+  case AMDILCC::IL_CC_D_EQ:
+  case AMDILCC::IL_CC_F_EQ:
+  case AMDILCC::IL_CC_F_OEQ:
+  case AMDILCC::IL_CC_D_OEQ:
+  case AMDILCC::IL_CC_D_NE:
+  case AMDILCC::IL_CC_F_NE:
+  case AMDILCC::IL_CC_F_UNE:
+  case AMDILCC::IL_CC_D_UNE:
+  case AMDILCC::IL_CC_D_GE:
+  case AMDILCC::IL_CC_F_GE:
+  case AMDILCC::IL_CC_D_OGE:
+  case AMDILCC::IL_CC_F_OGE:
+  case AMDILCC::IL_CC_D_LT:
+  case AMDILCC::IL_CC_F_LT:
+  case AMDILCC::IL_CC_F_OLT:
+  case AMDILCC::IL_CC_D_OLT:
+    generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
+    break;
+  case AMDILCC::IL_CC_I_GT:
+  case AMDILCC::IL_CC_I_LE:
+  case AMDILCC::IL_CC_U_GT:
+  case AMDILCC::IL_CC_U_LE:
+  case AMDILCC::IL_CC_F_GT:
+  case AMDILCC::IL_CC_D_GT:
+  case AMDILCC::IL_CC_F_OGT:
+  case AMDILCC::IL_CC_D_OGT:
+  case AMDILCC::IL_CC_F_LE:
+  case AMDILCC::IL_CC_D_LE:
+  case AMDILCC::IL_CC_D_OLE:
+  case AMDILCC::IL_CC_F_OLE:
+    generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
+    break;
+  case AMDILCC::IL_CC_F_UGT:
+  case AMDILCC::IL_CC_F_ULE: {
+    uint32_t VReg[4] = {
+      genVReg(simpleVT), genVReg(simpleVT),
+      genVReg(simpleVT), genVReg(simpleVT)
+    };
+    generateMachineInst(opCode, VReg[0], rhsreg, lhsreg);
+    generateMachineInst(AMDIL::FNE, VReg[1], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::FNE, VReg[2], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        VReg[3], VReg[0], VReg[1]);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        DST.getReg(), VReg[2], VReg[3]);
+  }
+  break;
+  case AMDILCC::IL_CC_F_ULT:
+  case AMDILCC::IL_CC_F_UGE: {
+    uint32_t VReg[4] = {
+      genVReg(simpleVT), genVReg(simpleVT),
+      genVReg(simpleVT), genVReg(simpleVT)
+    };
+    generateMachineInst(opCode, VReg[0], lhsreg, rhsreg);
+    generateMachineInst(AMDIL::FNE, VReg[1], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::FNE, VReg[2], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        VReg[3], VReg[0], VReg[1]);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        DST.getReg(), VReg[2], VReg[3]);
+  }
+  break;
+  case AMDILCC::IL_CC_D_UGT:
+  case AMDILCC::IL_CC_D_ULE: {
+    uint32_t regID = AMDIL::GPRF64RegClassID;
+    uint32_t VReg[4] = {
+      genVReg(regID), genVReg(regID),
+      genVReg(regID), genVReg(regID)
+    };
+    // The result of a double comparison is a 32bit result
+    generateMachineInst(opCode, VReg[0], rhsreg, lhsreg);
+    generateMachineInst(AMDIL::DNE, VReg[1], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::DNE, VReg[2], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        VReg[3], VReg[0], VReg[1]);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        DST.getReg(), VReg[2], VReg[3]);
+  }
+  break;
+  case AMDILCC::IL_CC_D_UGE:
+  case AMDILCC::IL_CC_D_ULT: {
+    uint32_t regID = AMDIL::GPRF64RegClassID;
+    uint32_t VReg[4] = {
+      genVReg(regID), genVReg(regID),
+      genVReg(regID), genVReg(regID)
+    };
+    // The result of a double comparison is a 32bit result
+    generateMachineInst(opCode, VReg[0], lhsreg, rhsreg);
+    generateMachineInst(AMDIL::DNE, VReg[1], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::DNE, VReg[2], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        VReg[3], VReg[0], VReg[1]);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        DST.getReg(), VReg[2], VReg[3]);
+  }
+  break;
+  case AMDILCC::IL_CC_F_UEQ: {
+    uint32_t VReg[4] = {
+      genVReg(simpleVT), genVReg(simpleVT),
+      genVReg(simpleVT), genVReg(simpleVT)
+    };
+    generateMachineInst(AMDIL::FEQ, VReg[0], lhsreg, rhsreg);
+    generateMachineInst(AMDIL::FNE, VReg[1], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::FNE, VReg[2], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        VReg[3], VReg[0], VReg[1]);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        DST.getReg(), VReg[2], VReg[3]);
+  }
+  break;
+  case AMDILCC::IL_CC_F_ONE: {
+    uint32_t VReg[4] = {
+      genVReg(simpleVT), genVReg(simpleVT),
+      genVReg(simpleVT), genVReg(simpleVT)
+    };
+    generateMachineInst(AMDIL::FNE, VReg[0], lhsreg, rhsreg);
+    generateMachineInst(AMDIL::FEQ, VReg[1], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::FEQ, VReg[2], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::BINARY_AND_f32,
+                        VReg[3], VReg[0], VReg[1]);
+    generateMachineInst(AMDIL::BINARY_AND_f32,
+                        DST.getReg(), VReg[2], VReg[3]);
+  }
+  break;
+  case AMDILCC::IL_CC_D_UEQ: {
+    uint32_t regID = AMDIL::GPRF64RegClassID;
+    uint32_t VReg[4] = {
+      genVReg(regID), genVReg(regID),
+      genVReg(regID), genVReg(regID)
+    };
+    // The result of a double comparison is a 32bit result
+    generateMachineInst(AMDIL::DEQ, VReg[0], lhsreg, rhsreg);
+    generateMachineInst(AMDIL::DNE, VReg[1], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::DNE, VReg[2], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        VReg[3], VReg[0], VReg[1]);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        DST.getReg(), VReg[2], VReg[3]);
+
+  }
+  break;
+  case AMDILCC::IL_CC_D_ONE: {
+    uint32_t regID = AMDIL::GPRF64RegClassID;
+    uint32_t VReg[4] = {
+      genVReg(regID), genVReg(regID),
+      genVReg(regID), genVReg(regID)
+    };
+    // The result of a double comparison is a 32bit result
+    generateMachineInst(AMDIL::DNE, VReg[0], lhsreg, rhsreg);
+    generateMachineInst(AMDIL::DEQ, VReg[1], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::DEQ, VReg[2], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::BINARY_AND_f32,
+                        VReg[3], VReg[0], VReg[1]);
+    generateMachineInst(AMDIL::BINARY_AND_f32,
+                        DST.getReg(), VReg[2], VReg[3]);
+
+  }
+  break;
+  case AMDILCC::IL_CC_F_O: {
+    uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
+    generateMachineInst(AMDIL::FEQ, VReg[0], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::FEQ, VReg[1], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::BINARY_AND_f32,
+                        DST.getReg(), VReg[0], VReg[1]);
+  }
+  break;
+  case AMDILCC::IL_CC_D_O: {
+    uint32_t regID = AMDIL::GPRF64RegClassID;
+    uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
+    // The result of a double comparison is a 32bit result
+    generateMachineInst(AMDIL::DEQ, VReg[0], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::DEQ, VReg[1], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::BINARY_AND_f32,
+                        DST.getReg(), VReg[0], VReg[1]);
+  }
+  break;
+  case AMDILCC::IL_CC_F_UO: {
+    uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
+    generateMachineInst(AMDIL::FNE, VReg[0], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::FNE, VReg[1], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        DST.getReg(), VReg[0], VReg[1]);
+  }
+  break;
+  case AMDILCC::IL_CC_D_UO: {
+    uint32_t regID = AMDIL::GPRF64RegClassID;
+    uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
+    // The result of a double comparison is a 32bit result
+    generateMachineInst(AMDIL::DNE, VReg[0], rhsreg, rhsreg);
+    generateMachineInst(AMDIL::DNE, VReg[1], lhsreg, lhsreg);
+    generateMachineInst(AMDIL::BINARY_OR_f32,
+                        DST.getReg(), VReg[0], VReg[1]);
+  }
+  break;
+  case AMDILCC::IL_CC_L_LE:
+  case AMDILCC::IL_CC_L_GE:
+  case AMDILCC::IL_CC_L_EQ:
+  case AMDILCC::IL_CC_L_NE:
+  case AMDILCC::IL_CC_L_LT:
+  case AMDILCC::IL_CC_L_GT:
+  case AMDILCC::IL_CC_UL_LE:
+  case AMDILCC::IL_CC_UL_GE:
+  case AMDILCC::IL_CC_UL_EQ:
+  case AMDILCC::IL_CC_UL_NE:
+  case AMDILCC::IL_CC_UL_LT:
+  case AMDILCC::IL_CC_UL_GT: {
+    const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+                                  &this->getTargetMachine())->getSubtargetImpl();
+    if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
+      generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
+    } else {
+      generateLongRelational(MI, opCode);
+    }
+  }
+  break;
+  case AMDILCC::COND_ERROR:
+    assert(0 && "Invalid CC code");
+    break;
+  };
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF())
+{
+  setBooleanVectorContents( ZeroOrNegativeOneBooleanContent );
+  int types[] = {
+    (int)MVT::i8,
+    (int)MVT::i16,
+    (int)MVT::i32,
+    (int)MVT::f32,
+    (int)MVT::f64,
+    (int)MVT::i64,
+    (int)MVT::v2i8,
+    (int)MVT::v4i8,
+    (int)MVT::v2i16,
+    (int)MVT::v4i16,
+    (int)MVT::v4f32,
+    (int)MVT::v4i32,
+    (int)MVT::v2f32,
+    (int)MVT::v2i32,
+    (int)MVT::v2f64,
+    (int)MVT::v2i64
+  };
+
+  int IntTypes[] = {
+    (int)MVT::i8,
+    (int)MVT::i16,
+    (int)MVT::i32,
+    (int)MVT::i64
+  };
+
+  int FloatTypes[] = {
+    (int)MVT::f32,
+    (int)MVT::f64
+  };
+
+  int VectorTypes[] = {
+    (int)MVT::v2i8,
+    (int)MVT::v4i8,
+    (int)MVT::v2i16,
+    (int)MVT::v4i16,
+    (int)MVT::v4f32,
+    (int)MVT::v4i32,
+    (int)MVT::v2f32,
+    (int)MVT::v2i32,
+    (int)MVT::v2f64,
+    (int)MVT::v2i64
+  };
+  size_t numTypes = sizeof(types) / sizeof(*types);
+  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+                                &this->getTargetMachine())->getSubtargetImpl();
+  // These are the current register classes that are
+  // supported
+
+  addRegisterClass(MVT::i32, &AMDIL::GPRI32RegClass);
+  addRegisterClass(MVT::f32, &AMDIL::GPRF32RegClass);
+
+  if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+    addRegisterClass(MVT::f64,   &AMDIL::GPRF64RegClass);
+    addRegisterClass(MVT::v2f64, &AMDIL::GPRV2F64RegClass);
+  }
+  if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
+    addRegisterClass(MVT::i8,   &AMDIL::GPRI8RegClass);
+    addRegisterClass(MVT::v2i8, &AMDIL::GPRV2I8RegClass);
+    addRegisterClass(MVT::v4i8, &AMDIL::GPRV4I8RegClass);
+    setOperationAction(ISD::Constant          , MVT::i8   , Legal);
+  }
+  if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
+    addRegisterClass(MVT::i16,   &AMDIL::GPRI16RegClass);
+    addRegisterClass(MVT::v2i16, &AMDIL::GPRV2I16RegClass);
+    addRegisterClass(MVT::v4i16, &AMDIL::GPRV4I16RegClass);
+    setOperationAction(ISD::Constant          , MVT::i16  , Legal);
+  }
+  addRegisterClass(MVT::v2f32, &AMDIL::GPRV2F32RegClass);
+  addRegisterClass(MVT::v4f32, &AMDIL::GPRV4F32RegClass);
+  addRegisterClass(MVT::v2i32, &AMDIL::GPRV2I32RegClass);
+  addRegisterClass(MVT::v4i32, &AMDIL::GPRV4I32RegClass);
+  if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
+    addRegisterClass(MVT::i64,   &AMDIL::GPRI64RegClass);
+    addRegisterClass(MVT::v2i64, &AMDIL::GPRV2I64RegClass);
+  }
+
+  // Make some ops legal since the "generic" target lowering made them expand
+  // (See lib/CodeGen/SelectionDag/TargetLowering.cpp)
+  setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
+  setOperationAction(ISD::FLOG ,  MVT::f32, Legal);
+  // Set explicitly to expand in case default changes
+  setOperationAction(ISD::FRINT,  MVT::f32, Expand);
+
+  for (unsigned int x  = 0; x < numTypes; ++x) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+    // We cannot sextinreg, expand to shifts
+    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+    setOperationAction(ISD::FP_ROUND, VT, Expand);
+    setOperationAction(ISD::OR, VT, Custom);
+    setOperationAction(ISD::SUBE, VT, Expand);
+    setOperationAction(ISD::SUBC, VT, Expand);
+    setOperationAction(ISD::ADD, VT, Custom);
+    setOperationAction(ISD::ADDE, VT, Expand);
+    setOperationAction(ISD::ADDC, VT, Expand);
+    setOperationAction(ISD::SETCC, VT, Custom);
+    setOperationAction(ISD::BRCOND, VT, Custom);
+    setOperationAction(ISD::BR_CC, VT, Custom);
+    setOperationAction(ISD::BR_JT, VT, Expand);
+    setOperationAction(ISD::BRIND, VT, Expand);
+    // TODO: Implement custom UREM/SREM routines
+    setOperationAction(ISD::UREM, VT, Expand);
+    setOperationAction(ISD::SREM, VT, Expand);
+    setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+    setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+    setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+    setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+    setOperationAction(ISDBITCAST, VT, Custom);
+    setOperationAction(ISD::GlobalAddress, VT, Custom);
+    setOperationAction(ISD::JumpTable, VT, Custom);
+    setOperationAction(ISD::ConstantPool, VT, Custom);
+    setOperationAction(ISD::SELECT_CC, VT, Custom);
+    setOperationAction(ISD::SELECT, VT, Custom);
+    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+    if (VT != MVT::i64 && VT != MVT::v2i64) {
+      setOperationAction(ISD::SDIV, VT, Custom);
+      setOperationAction(ISD::UDIV, VT, Custom);
+    }
+    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+  }
+  for (unsigned int x = 0; x < numFloatTypes; ++x) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+    // IL does not have these operations for floating point types
+    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+    setOperationAction(ISD::FP_ROUND, VT, Custom);
+    setOperationAction(ISD::SETOLT, VT, Expand);
+    setOperationAction(ISD::SETOGE, VT, Expand);
+    setOperationAction(ISD::SETOGT, VT, Expand);
+    setOperationAction(ISD::SETOLE, VT, Expand);
+    setOperationAction(ISD::SETULT, VT, Expand);
+    setOperationAction(ISD::SETUGE, VT, Expand);
+    setOperationAction(ISD::SETUGT, VT, Expand);
+    setOperationAction(ISD::SETULE, VT, Expand);
+  }
+
+  for (unsigned int x = 0; x < numIntTypes; ++x) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+    // GPU also does not have divrem function for signed or unsigned
+    setOperationAction(ISD::SDIVREM, VT, Expand);
+    setOperationAction(ISD::UDIVREM, VT, Expand);
+    setOperationAction(ISD::FP_ROUND, VT, Expand);
+
+    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+    // GPU doesn't have a rotl, rotr, or byteswap instruction
+    setOperationAction(ISD::ROTR, VT, Expand);
+    setOperationAction(ISD::ROTL, VT, Expand);
+    setOperationAction(ISD::BSWAP, VT, Expand);
+
+    // GPU doesn't have any counting operators
+    setOperationAction(ISD::CTPOP, VT, Expand);
+    setOperationAction(ISD::CTTZ, VT, Expand);
+    setOperationAction(ISD::CTLZ, VT, Expand);
+  }
+
+  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+    setOperationAction(ISD::FP_ROUND, VT, Expand);
+    setOperationAction(ISD::SDIVREM, VT, Expand);
+    setOperationAction(ISD::UDIVREM, VT, Expand);
+    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+    // setOperationAction(ISD::VSETCC, VT, Expand);
+    setOperationAction(ISD::SETCC, VT, Expand);
+    setOperationAction(ISD::SELECT_CC, VT, Expand);
+    setOperationAction(ISD::SELECT, VT, Expand);
+
+  }
+  setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
+  if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
+    if (stm->calVersion() < CAL_VERSION_SC_139
+        || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+      setOperationAction(ISD::MUL, MVT::i64, Custom);
+    }
+    setOperationAction(ISD::SUB, MVT::i64, Custom);
+    setOperationAction(ISD::ADD, MVT::i64, Custom);
+    setOperationAction(ISD::MULHU, MVT::i64, Expand);
+    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+    setOperationAction(ISD::MULHS, MVT::i64, Expand);
+    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+    setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
+    setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
+    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
+    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+  }
+  if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+    // we support loading/storing v2f64 but not operations on the type
+    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+    setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
+    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
+    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+    // We want to expand vector conversions into their scalar
+    // counterparts.
+    setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
+    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+    setOperationAction(ISD::FABS, MVT::f64, Expand);
+    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+  }
+  // TODO: Fix the UDIV24 algorithm so it works for these
+  // types correctly. This needs vector comparisons
+  // for this to work correctly.
+  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+  setOperationAction(ISD::SUBC, MVT::Other, Expand);
+  setOperationAction(ISD::ADDE, MVT::Other, Expand);
+  setOperationAction(ISD::ADDC, MVT::Other, Expand);
+  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+  setOperationAction(ISD::BR_CC, MVT::Other, Custom);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BRIND, MVT::Other, Expand);
+  setOperationAction(ISD::SETCC, MVT::Other, Custom);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+  setOperationAction(ISD::FDIV, MVT::f32, Custom);
+  setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
+  setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
+
+  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
+  // Use the default implementation.
+  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
+  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
+  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
+  setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
+
+  setStackPointerRegisterToSaveRestore(AMDIL::SP);
+  setSchedulingPreference(Sched::RegPressure);
+  setPow2DivIsCheap(false);
+  setPrefLoopAlignment(16);
+  setSelectIsExpensive(true);
+  setJumpIsExpensive(true);
+  computeRegisterProperties();
+
+  maxStoresPerMemcpy  = 4096;
+  maxStoresPerMemmove = 4096;
+  maxStoresPerMemset  = 4096;
+
+#undef numTypes
+#undef numIntTypes
+#undef numVectorTypes
+#undef numFloatTypes
+}
+
+// This only works for region/local/global address spaces on EG/NI as
+// the other address spaces required 128 bit alignement of loads/stores.
+// However, there is no way to disable for those address spaces
+// and only for specific types.
+// TODO: Modify this API call to pass in the address space/instruction
+bool
+AMDILTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const
+{
+  const AMDILSubtarget *STM = &this->getTargetMachine()
+                              .getSubtarget<AMDILSubtarget>();
+  // 7XX does not allow unaligned memory accesses
+  if (STM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+    return false;
+  }
+  return (VT == MVT::v4f32 || VT == MVT::v4i32
+          || VT == MVT::v2f32 || VT == MVT::v2i32
+          || VT == MVT::f64   || VT == MVT::i64
+          || VT == MVT::v2f64 || VT == MVT::v2i64);
+}
+
+const char *
+AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+  switch (Opcode) {
+  default:
+    return 0;
+  case AMDILISD::INTTOANY:
+    return "AMDILISD::INTTOANY";
+  case AMDILISD::DP_TO_FP:
+    return "AMDILISD::DP_TO_FP";
+  case AMDILISD::FP_TO_DP:
+    return "AMDILISD::FP_TO_DP";
+  case AMDILISD::BITCONV:
+    return "AMDILISD::BITCONV";
+  case AMDILISD::CMOV:
+    return "AMDILISD::CMOV";
+  case AMDILISD::CMOVLOG:
+    return "AMDILISD::CMOVLOG";
+  case AMDILISD::INEGATE:
+    return "AMDILISD::INEGATE";
+  case AMDILISD::MAD:
+    return "AMDILISD::MAD";
+  case AMDILISD::UMAD:
+    return "AMDILISD::UMAD";
+  case AMDILISD::CALL:
+    return "AMDILISD::CALL";
+  case AMDILISD::RET:
+    return "AMDILISD::RET";
+  case AMDILISD::IFFB_HI:
+    return "AMDILISD::IFFB_HI";
+  case AMDILISD::IFFB_LO:
+    return "AMDILISD::IFFB_LO";
+  case AMDILISD::ADD:
+    return "AMDILISD::ADD";
+  case AMDILISD::UMUL:
+    return "AMDILISD::UMUL";
+  case AMDILISD::AND:
+    return "AMDILISD::AND";
+  case AMDILISD::OR:
+    return "AMDILISD::OR";
+  case AMDILISD::NOT:
+    return "AMDILISD::NOT";
+  case AMDILISD::XOR:
+    return "AMDILISD::XOR";
+  case AMDILISD::DIV_INF:
+    return "AMDILISD::DIV_INF";
+  case AMDILISD::SMAX:
+    return "AMDILISD::SMAX";
+  case AMDILISD::PHIMOVE:
+    return "AMDILISD::PHIMOVE";
+  case AMDILISD::MOVE:
+    return "AMDILISD::MOVE";
+  case AMDILISD::VBUILD:
+    return "AMDILISD::VBUILD";
+  case AMDILISD::VEXTRACT:
+    return "AMDILISD::VEXTRACT";
+  case AMDILISD::VINSERT:
+    return "AMDILISD::VINSERT";
+  case AMDILISD::VCONCAT:
+    return "AMDILISD::VCONCAT";
+  case AMDILISD::LCREATE:
+    return "AMDILISD::LCREATE";
+  case AMDILISD::LCOMPHI:
+    return "AMDILISD::LCOMPHI";
+  case AMDILISD::LCOMPLO:
+    return "AMDILISD::LCOMPLO";
+  case AMDILISD::DCREATE:
+    return "AMDILISD::DCREATE";
+  case AMDILISD::DCOMPHI:
+    return "AMDILISD::DCOMPHI";
+  case AMDILISD::DCOMPLO:
+    return "AMDILISD::DCOMPLO";
+  case AMDILISD::LCREATE2:
+    return "AMDILISD::LCREATE2";
+  case AMDILISD::LCOMPHI2:
+    return "AMDILISD::LCOMPHI2";
+  case AMDILISD::LCOMPLO2:
+    return "AMDILISD::LCOMPLO2";
+  case AMDILISD::DCREATE2:
+    return "AMDILISD::DCREATE2";
+  case AMDILISD::DCOMPHI2:
+    return "AMDILISD::DCOMPHI2";
+  case AMDILISD::DCOMPLO2:
+    return "AMDILISD::DCOMPLO2";
+  case AMDILISD::CMP:
+    return "AMDILISD::CMP";
+  case AMDILISD::IL_CC_I_LT:
+    return "AMDILISD::IL_CC_I_LT";
+  case AMDILISD::IL_CC_I_LE:
+    return "AMDILISD::IL_CC_I_LE";
+  case AMDILISD::IL_CC_I_GT:
+    return "AMDILISD::IL_CC_I_GT";
+  case AMDILISD::IL_CC_I_GE:
+    return "AMDILISD::IL_CC_I_GE";
+  case AMDILISD::IL_CC_I_EQ:
+    return "AMDILISD::IL_CC_I_EQ";
+  case AMDILISD::IL_CC_I_NE:
+    return "AMDILISD::IL_CC_I_NE";
+  case AMDILISD::RET_FLAG:
+    return "AMDILISD::RET_FLAG";
+  case AMDILISD::BRANCH_COND:
+    return "AMDILISD::BRANCH_COND";
+  case AMDILISD::LOOP_NZERO:
+    return "AMDILISD::LOOP_NZERO";
+  case AMDILISD::LOOP_ZERO:
+    return "AMDILISD::LOOP_ZERO";
+  case AMDILISD::LOOP_CMP:
+    return "AMDILISD::LOOP_CMP";
+  case AMDILISD::ADDADDR:
+    return "AMDILISD::ADDADDR";
+  case AMDILISD::ATOM_F_ADD:
+    return "AMDILISD::ATOM_F_ADD";
+  case AMDILISD::ATOM_F_AND:
+    return "AMDILISD::ATOM_F_AND";
+  case AMDILISD::ATOM_F_CMPXCHG:
+    return "AMDILISD::ATOM_F_CMPXCHG";
+  case AMDILISD::ATOM_F_DEC:
+    return "AMDILISD::ATOM_F_DEC";
+  case AMDILISD::ATOM_F_INC:
+    return "AMDILISD::ATOM_F_INC";
+  case AMDILISD::ATOM_F_MAX:
+    return "AMDILISD::ATOM_F_MAX";
+  case AMDILISD::ATOM_F_UMAX:
+    return "AMDILISD::ATOM_F_UMAX";
+  case AMDILISD::ATOM_F_MIN:
+    return "AMDILISD::ATOM_F_MIN";
+  case AMDILISD::ATOM_F_UMIN:
+    return "AMDILISD::ATOM_F_UMIN";
+  case AMDILISD::ATOM_F_OR:
+    return "AMDILISD::ATOM_F_OR";
+  case AMDILISD::ATOM_F_SUB:
+    return "AMDILISD::ATOM_F_SUB";
+  case AMDILISD::ATOM_F_XCHG:
+    return "AMDILISD::ATOM_F_XCHG";
+  case AMDILISD::ATOM_F_XOR:
+    return "AMDILISD::ATOM_F_XOR";
+  case AMDILISD::ATOM_G_ADD:
+    return "AMDILISD::ATOM_G_ADD";
+  case AMDILISD::ATOM_G_AND:
+    return "AMDILISD::ATOM_G_AND";
+  case AMDILISD::ATOM_G_CMPXCHG:
+    return "AMDILISD::ATOM_G_CMPXCHG";
+  case AMDILISD::ATOM_G_DEC:
+    return "AMDILISD::ATOM_G_DEC";
+  case AMDILISD::ATOM_G_INC:
+    return "AMDILISD::ATOM_G_INC";
+  case AMDILISD::ATOM_G_MAX:
+    return "AMDILISD::ATOM_G_MAX";
+  case AMDILISD::ATOM_G_UMAX:
+    return "AMDILISD::ATOM_G_UMAX";
+  case AMDILISD::ATOM_G_MIN:
+    return "AMDILISD::ATOM_G_MIN";
+  case AMDILISD::ATOM_G_UMIN:
+    return "AMDILISD::ATOM_G_UMIN";
+  case AMDILISD::ATOM_G_OR:
+    return "AMDILISD::ATOM_G_OR";
+  case AMDILISD::ATOM_G_SUB:
+    return "AMDILISD::ATOM_G_SUB";
+  case AMDILISD::ATOM_G_RSUB:
+    return "AMDILISD::ATOM_G_RSUB";
+  case AMDILISD::ATOM_G_XCHG:
+    return "AMDILISD::ATOM_G_XCHG";
+  case AMDILISD::ATOM_G_XOR:
+    return "AMDILISD::ATOM_G_XOR";
+  case AMDILISD::ATOM_G_ADD_NORET:
+    return "AMDILISD::ATOM_G_ADD_NORET";
+  case AMDILISD::ATOM_G_AND_NORET:
+    return "AMDILISD::ATOM_G_AND_NORET";
+  case AMDILISD::ATOM_G_CMPXCHG_NORET:
+    return "AMDILISD::ATOM_G_CMPXCHG_NORET";
+  case AMDILISD::ATOM_G_DEC_NORET:
+    return "AMDILISD::ATOM_G_DEC_NORET";
+  case AMDILISD::ATOM_G_INC_NORET:
+    return "AMDILISD::ATOM_G_INC_NORET";
+  case AMDILISD::ATOM_G_MAX_NORET:
+    return "AMDILISD::ATOM_G_MAX_NORET";
+  case AMDILISD::ATOM_G_UMAX_NORET:
+    return "AMDILISD::ATOM_G_UMAX_NORET";
+  case AMDILISD::ATOM_G_MIN_NORET:
+    return "AMDILISD::ATOM_G_MIN_NORET";
+  case AMDILISD::ATOM_G_UMIN_NORET:
+    return "AMDILISD::ATOM_G_UMIN_NORET";
+  case AMDILISD::ATOM_G_OR_NORET:
+    return "AMDILISD::ATOM_G_OR_NORET";
+  case AMDILISD::ATOM_G_SUB_NORET:
+    return "AMDILISD::ATOM_G_SUB_NORET";
+  case AMDILISD::ATOM_G_RSUB_NORET:
+    return "AMDILISD::ATOM_G_RSUB_NORET";
+  case AMDILISD::ATOM_G_XCHG_NORET:
+    return "AMDILISD::ATOM_G_XCHG_NORET";
+  case AMDILISD::ATOM_G_XOR_NORET:
+    return "AMDILISD::ATOM_G_XOR_NORET";
+  case AMDILISD::ATOM_L_ADD:
+    return "AMDILISD::ATOM_L_ADD";
+  case AMDILISD::ATOM_L_AND:
+    return "AMDILISD::ATOM_L_AND";
+  case AMDILISD::ATOM_L_CMPXCHG:
+    return "AMDILISD::ATOM_L_CMPXCHG";
+  case AMDILISD::ATOM_L_DEC:
+    return "AMDILISD::ATOM_L_DEC";
+  case AMDILISD::ATOM_L_INC:
+    return "AMDILISD::ATOM_L_INC";
+  case AMDILISD::ATOM_L_MAX:
+    return "AMDILISD::ATOM_L_MAX";
+  case AMDILISD::ATOM_L_UMAX:
+    return "AMDILISD::ATOM_L_UMAX";
+  case AMDILISD::ATOM_L_MIN:
+    return "AMDILISD::ATOM_L_MIN";
+  case AMDILISD::ATOM_L_UMIN:
+    return "AMDILISD::ATOM_L_UMIN";
+  case AMDILISD::ATOM_L_OR:
+    return "AMDILISD::ATOM_L_OR";
+  case AMDILISD::ATOM_L_SUB:
+    return "AMDILISD::ATOM_L_SUB";
+  case AMDILISD::ATOM_L_RSUB:
+    return "AMDILISD::ATOM_L_RSUB";
+  case AMDILISD::ATOM_L_XCHG:
+    return "AMDILISD::ATOM_L_XCHG";
+  case AMDILISD::ATOM_L_XOR:
+    return "AMDILISD::ATOM_L_XOR";
+  case AMDILISD::ATOM_L_ADD_NORET:
+    return "AMDILISD::ATOM_L_ADD_NORET";
+  case AMDILISD::ATOM_L_AND_NORET:
+    return "AMDILISD::ATOM_L_AND_NORET";
+  case AMDILISD::ATOM_L_CMPXCHG_NORET:
+    return "AMDILISD::ATOM_L_CMPXCHG_NORET";
+  case AMDILISD::ATOM_L_DEC_NORET:
+    return "AMDILISD::ATOM_L_DEC_NORET";
+  case AMDILISD::ATOM_L_INC_NORET:
+    return "AMDILISD::ATOM_L_INC_NORET";
+  case AMDILISD::ATOM_L_MAX_NORET:
+    return "AMDILISD::ATOM_L_MAX_NORET";
+  case AMDILISD::ATOM_L_UMAX_NORET:
+    return "AMDILISD::ATOM_L_UMAX_NORET";
+  case AMDILISD::ATOM_L_MIN_NORET:
+    return "AMDILISD::ATOM_L_MIN_NORET";
+  case AMDILISD::ATOM_L_UMIN_NORET:
+    return "AMDILISD::ATOM_L_UMIN_NORET";
+  case AMDILISD::ATOM_L_OR_NORET:
+    return "AMDILISD::ATOM_L_OR_NORET";
+  case AMDILISD::ATOM_L_SUB_NORET:
+    return "AMDILISD::ATOM_L_SUB_NORET";
+  case AMDILISD::ATOM_L_RSUB_NORET:
+    return "AMDILISD::ATOM_L_RSUB_NORET";
+  case AMDILISD::ATOM_L_XCHG_NORET:
+    return "AMDILISD::ATOM_L_XCHG_NORET";
+  case AMDILISD::ATOM_R_ADD:
+    return "AMDILISD::ATOM_R_ADD";
+  case AMDILISD::ATOM_R_AND:
+    return "AMDILISD::ATOM_R_AND";
+  case AMDILISD::ATOM_R_CMPXCHG:
+    return "AMDILISD::ATOM_R_CMPXCHG";
+  case AMDILISD::ATOM_R_DEC:
+    return "AMDILISD::ATOM_R_DEC";
+  case AMDILISD::ATOM_R_INC:
+    return "AMDILISD::ATOM_R_INC";
+  case AMDILISD::ATOM_R_MAX:
+    return "AMDILISD::ATOM_R_MAX";
+  case AMDILISD::ATOM_R_UMAX:
+    return "AMDILISD::ATOM_R_UMAX";
+  case AMDILISD::ATOM_R_MIN:
+    return "AMDILISD::ATOM_R_MIN";
+  case AMDILISD::ATOM_R_UMIN:
+    return "AMDILISD::ATOM_R_UMIN";
+  case AMDILISD::ATOM_R_OR:
+    return "AMDILISD::ATOM_R_OR";
+  case AMDILISD::ATOM_R_MSKOR:
+    return "AMDILISD::ATOM_R_MSKOR";
+  case AMDILISD::ATOM_R_SUB:
+    return "AMDILISD::ATOM_R_SUB";
+  case AMDILISD::ATOM_R_RSUB:
+    return "AMDILISD::ATOM_R_RSUB";
+  case AMDILISD::ATOM_R_XCHG:
+    return "AMDILISD::ATOM_R_XCHG";
+  case AMDILISD::ATOM_R_XOR:
+    return "AMDILISD::ATOM_R_XOR";
+  case AMDILISD::ATOM_R_ADD_NORET:
+    return "AMDILISD::ATOM_R_ADD_NORET";
+  case AMDILISD::ATOM_R_AND_NORET:
+    return "AMDILISD::ATOM_R_AND_NORET";
+  case AMDILISD::ATOM_R_CMPXCHG_NORET:
+    return "AMDILISD::ATOM_R_CMPXCHG_NORET";
+  case AMDILISD::ATOM_R_DEC_NORET:
+    return "AMDILISD::ATOM_R_DEC_NORET";
+  case AMDILISD::ATOM_R_INC_NORET:
+    return "AMDILISD::ATOM_R_INC_NORET";
+  case AMDILISD::ATOM_R_MAX_NORET:
+    return "AMDILISD::ATOM_R_MAX_NORET";
+  case AMDILISD::ATOM_R_UMAX_NORET:
+    return "AMDILISD::ATOM_R_UMAX_NORET";
+  case AMDILISD::ATOM_R_MIN_NORET:
+    return "AMDILISD::ATOM_R_MIN_NORET";
+  case AMDILISD::ATOM_R_UMIN_NORET:
+    return "AMDILISD::ATOM_R_UMIN_NORET";
+  case AMDILISD::ATOM_R_OR_NORET:
+    return "AMDILISD::ATOM_R_OR_NORET";
+  case AMDILISD::ATOM_R_MSKOR_NORET:
+    return "AMDILISD::ATOM_R_MSKOR_NORET";
+  case AMDILISD::ATOM_R_SUB_NORET:
+    return "AMDILISD::ATOM_R_SUB_NORET";
+  case AMDILISD::ATOM_R_RSUB_NORET:
+    return "AMDILISD::ATOM_R_RSUB_NORET";
+  case AMDILISD::ATOM_R_XCHG_NORET:
+    return "AMDILISD::ATOM_R_XCHG_NORET";
+  case AMDILISD::ATOM_R_XOR_NORET:
+    return "AMDILISD::ATOM_R_XOR_NORET";
+  case AMDILISD::APPEND_ALLOC:
+    return "AMDILISD::APPEND_ALLOC";
+  case AMDILISD::APPEND_CONSUME:
+    return "AMDILISD::APPEND_CONSUME";
+  };
+}
+
+/// getSetCCResultType - Return the value type to use for ISD::SETCC.
+EVT AMDILTargetLowering::getSetCCResultType(EVT VT) const
+{
+  if (!VT.isVector())
+    return MVT::i32;
+  return VT.changeVectorElementTypeToInteger();
+}
+
+
+bool
+AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                        const CallInst &I, unsigned Intrinsic) const
+{
+  if (Intrinsic <= AMDILIntrinsic::last_non_AMDIL_intrinsic
+      || Intrinsic > AMDILIntrinsic::num_AMDIL_intrinsics) {
+    return false;
+  }
+  bool bitCastToInt = false;
+  unsigned IntNo;
+  bool isRet = true;
+  const AMDILSubtarget *STM = &this->getTargetMachine()
+                              .getSubtarget<AMDILSubtarget>();
+  switch (Intrinsic) {
+  default:
+    return false; // Don't custom lower most intrinsics.
+  case AMDILIntrinsic::AMDIL_atomic_add_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_add_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_add_gi64:
+  case AMDILIntrinsic::AMDIL_atomic_add_gu64:
+    IntNo = AMDILISD::ATOM_G_ADD;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_add_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_add_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_add_gi64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_add_gu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_ADD_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_add_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_add_li32:
+  case AMDILIntrinsic::AMDIL_atomic_add_lu64:
+  case AMDILIntrinsic::AMDIL_atomic_add_li64:
+    IntNo = AMDILISD::ATOM_L_ADD;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_add_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_add_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_add_li64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_add_lu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_ADD_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_add_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_add_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_add_ru64:
+  case AMDILIntrinsic::AMDIL_atomic_add_ri64:
+    IntNo = AMDILISD::ATOM_R_ADD;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_add_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_add_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_add_ri64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_add_ru64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_ADD_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_and_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_and_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_and_gi64:
+  case AMDILIntrinsic::AMDIL_atomic_and_gu64:
+    IntNo = AMDILISD::ATOM_G_AND;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_and_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_and_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_and_gi64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_and_gu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_AND_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_and_li32:
+  case AMDILIntrinsic::AMDIL_atomic_and_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_and_li64:
+  case AMDILIntrinsic::AMDIL_atomic_and_lu64:
+    IntNo = AMDILISD::ATOM_L_AND;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_and_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_and_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_and_li64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_and_lu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_AND_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_and_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_and_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_and_ri64:
+  case AMDILIntrinsic::AMDIL_atomic_and_ru64:
+    IntNo = AMDILISD::ATOM_R_AND;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_and_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_and_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_and_ri64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_and_ru64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_AND_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi64:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu64:
+    IntNo = AMDILISD::ATOM_G_CMPXCHG;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li32:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li64:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu64:
+    IntNo = AMDILISD::ATOM_L_CMPXCHG;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri64:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru64:
+    IntNo = AMDILISD::ATOM_R_CMPXCHG;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_dec_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_dec_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_dec_gi64:
+  case AMDILIntrinsic::AMDIL_atomic_dec_gu64:
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_G_DEC;
+    } else {
+      IntNo = AMDILISD::ATOM_G_SUB;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_dec_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_dec_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_dec_gi64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_dec_gu64_noret:
+    isRet = false;
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_G_DEC_NORET;
+    } else {
+      IntNo = AMDILISD::ATOM_G_SUB_NORET;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_dec_li32:
+  case AMDILIntrinsic::AMDIL_atomic_dec_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_dec_li64:
+  case AMDILIntrinsic::AMDIL_atomic_dec_lu64:
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_L_DEC;
+    } else {
+      IntNo = AMDILISD::ATOM_L_SUB;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_dec_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_dec_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_dec_li64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_dec_lu64_noret:
+    isRet = false;
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_L_DEC_NORET;
+    } else {
+      IntNo = AMDILISD::ATOM_L_SUB_NORET;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_dec_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_dec_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_dec_ri64:
+  case AMDILIntrinsic::AMDIL_atomic_dec_ru64:
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_R_DEC;
+    } else {
+      IntNo = AMDILISD::ATOM_R_SUB;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_dec_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_dec_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_dec_ri64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_dec_ru64_noret:
+    isRet = false;
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_R_DEC_NORET;
+    } else {
+      IntNo = AMDILISD::ATOM_R_SUB_NORET;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_inc_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_inc_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_inc_gi64:
+  case AMDILIntrinsic::AMDIL_atomic_inc_gu64:
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_G_INC;
+    } else {
+      IntNo = AMDILISD::ATOM_G_ADD;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_inc_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_inc_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_inc_gi64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_inc_gu64_noret:
+    isRet = false;
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_G_INC_NORET;
+    } else {
+      IntNo = AMDILISD::ATOM_G_ADD_NORET;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_inc_li32:
+  case AMDILIntrinsic::AMDIL_atomic_inc_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_inc_li64:
+  case AMDILIntrinsic::AMDIL_atomic_inc_lu64:
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_L_INC;
+    } else {
+      IntNo = AMDILISD::ATOM_L_ADD;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_inc_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_inc_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_inc_li64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_inc_lu64_noret:
+    isRet = false;
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_L_INC_NORET;
+    } else {
+      IntNo = AMDILISD::ATOM_L_ADD_NORET;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_inc_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_inc_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_inc_ri64:
+  case AMDILIntrinsic::AMDIL_atomic_inc_ru64:
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_R_INC;
+    } else {
+      IntNo = AMDILISD::ATOM_R_ADD;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_inc_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_inc_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_inc_ri64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_inc_ru64_noret:
+    isRet = false;
+    if (STM->calVersion() >= CAL_VERSION_SC_136) {
+      IntNo = AMDILISD::ATOM_R_INC_NORET;
+    } else {
+      IntNo = AMDILISD::ATOM_R_ADD_NORET;
+    }
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_max_gi64:
+    IntNo = AMDILISD::ATOM_G_MAX;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_max_gu64:
+    IntNo = AMDILISD::ATOM_G_UMAX;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_max_gi64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_MAX_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_max_gu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_UMAX_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_li32:
+  case AMDILIntrinsic::AMDIL_atomic_max_li64:
+    IntNo = AMDILISD::ATOM_L_MAX;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_max_lu64:
+    IntNo = AMDILISD::ATOM_L_UMAX;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_max_li64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_MAX_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_max_lu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_UMAX_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_max_ri64:
+    IntNo = AMDILISD::ATOM_R_MAX;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_max_ru64:
+    IntNo = AMDILISD::ATOM_R_UMAX;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_max_ri64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_MAX_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_max_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_max_ru64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_UMAX_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_min_gi64:
+    IntNo = AMDILISD::ATOM_G_MIN;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_min_gu64:
+    IntNo = AMDILISD::ATOM_G_UMIN;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_min_gi64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_MIN_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_min_gu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_UMIN_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_li32:
+  case AMDILIntrinsic::AMDIL_atomic_min_li64:
+    IntNo = AMDILISD::ATOM_L_MIN;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_min_lu64:
+    IntNo = AMDILISD::ATOM_L_UMIN;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_min_li64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_MIN_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_min_lu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_UMIN_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_min_ri64:
+    IntNo = AMDILISD::ATOM_R_MIN;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_min_ru64:
+    IntNo = AMDILISD::ATOM_R_UMIN;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_min_ri64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_MIN_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_min_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_min_ru64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_UMIN_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_or_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_or_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_or_gi64:
+  case AMDILIntrinsic::AMDIL_atomic_or_gu64:
+    IntNo = AMDILISD::ATOM_G_OR;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_or_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_or_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_or_gi64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_or_gu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_OR_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_or_li32:
+  case AMDILIntrinsic::AMDIL_atomic_or_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_or_li64:
+  case AMDILIntrinsic::AMDIL_atomic_or_lu64:
+    IntNo = AMDILISD::ATOM_L_OR;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_or_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_or_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_or_li64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_or_lu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_OR_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_or_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_or_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_or_ri64:
+  case AMDILIntrinsic::AMDIL_atomic_or_ru64:
+    IntNo = AMDILISD::ATOM_R_OR;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_or_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_or_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_or_ri64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_or_ru64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_OR_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_sub_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_sub_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_sub_gi64:
+  case AMDILIntrinsic::AMDIL_atomic_sub_gu64:
+    IntNo = AMDILISD::ATOM_G_SUB;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_sub_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_sub_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_sub_gi64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_sub_gu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_SUB_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_sub_li32:
+  case AMDILIntrinsic::AMDIL_atomic_sub_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_sub_li64:
+  case AMDILIntrinsic::AMDIL_atomic_sub_lu64:
+    IntNo = AMDILISD::ATOM_L_SUB;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_sub_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_sub_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_sub_li64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_sub_lu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_SUB_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_sub_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_sub_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_sub_ri64:
+  case AMDILIntrinsic::AMDIL_atomic_sub_ru64:
+    IntNo = AMDILISD::ATOM_R_SUB;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_sub_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_sub_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_sub_ri64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_sub_ru64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_SUB_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_rsub_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_gi64:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_gu64:
+    IntNo = AMDILISD::ATOM_G_RSUB;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_rsub_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_gi64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_gu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_RSUB_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_rsub_li32:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_li64:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_lu64:
+    IntNo = AMDILISD::ATOM_L_RSUB;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_rsub_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_li64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_lu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_RSUB_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_rsub_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_ri64:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_ru64:
+    IntNo = AMDILISD::ATOM_R_RSUB;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_rsub_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_ri64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_rsub_ru64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_RSUB_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_gf32:
+    bitCastToInt = true;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_gi64:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_gu64:
+    IntNo = AMDILISD::ATOM_G_XCHG;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_gf32_noret:
+    bitCastToInt = true;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_gi64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_gu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_XCHG_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_lf32:
+    bitCastToInt = true;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_li32:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_li64:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_lu64:
+    IntNo = AMDILISD::ATOM_L_XCHG;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_lf32_noret:
+    bitCastToInt = true;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_li64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_lu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_XCHG_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_rf32:
+    bitCastToInt = true;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_ri64:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_ru64:
+    IntNo = AMDILISD::ATOM_R_XCHG;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_rf32_noret:
+    bitCastToInt = true;
+  case AMDILIntrinsic::AMDIL_atomic_xchg_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_ri64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xchg_ru64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_XCHG_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xor_gi32:
+  case AMDILIntrinsic::AMDIL_atomic_xor_gu32:
+  case AMDILIntrinsic::AMDIL_atomic_xor_gi64:
+  case AMDILIntrinsic::AMDIL_atomic_xor_gu64:
+    IntNo = AMDILISD::ATOM_G_XOR;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xor_gi32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xor_gu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xor_gi64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xor_gu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_G_XOR_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xor_li32:
+  case AMDILIntrinsic::AMDIL_atomic_xor_lu32:
+  case AMDILIntrinsic::AMDIL_atomic_xor_li64:
+  case AMDILIntrinsic::AMDIL_atomic_xor_lu64:
+    IntNo = AMDILISD::ATOM_L_XOR;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xor_li32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xor_lu32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xor_li64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xor_lu64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_L_XOR_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xor_ri32:
+  case AMDILIntrinsic::AMDIL_atomic_xor_ru32:
+  case AMDILIntrinsic::AMDIL_atomic_xor_ri64:
+  case AMDILIntrinsic::AMDIL_atomic_xor_ru64:
+    IntNo = AMDILISD::ATOM_R_XOR;
+    break;
+  case AMDILIntrinsic::AMDIL_atomic_xor_ri32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xor_ru32_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xor_ri64_noret:
+  case AMDILIntrinsic::AMDIL_atomic_xor_ru64_noret:
+    isRet = false;
+    IntNo = AMDILISD::ATOM_R_XOR_NORET;
+    break;
+  case AMDILIntrinsic::AMDIL_append_alloc_i32:
+    IntNo = AMDILISD::APPEND_ALLOC;
+    break;
+  case AMDILIntrinsic::AMDIL_append_consume_i32:
+    IntNo = AMDILISD::APPEND_CONSUME;
+    break;
+  };
+  const AMDILSubtarget *stm = &this->getTargetMachine()
+                              .getSubtarget<AMDILSubtarget>();
+  AMDILKernelManager *KM = const_cast<AMDILKernelManager*>(
+                             stm->getKernelManager());
+  KM->setOutputInst();
+
+  Info.opc = IntNo;
+  Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
+  Info.ptrVal = I.getOperand(0);
+  Info.offset = 0;
+  Info.align = 4;
+  Info.vol = true;
+  Info.readMem = isRet;
+  Info.writeMem = true;
+  return true;
+}
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
+{
+  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool
+AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
+{
+  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+    return false;
+  } else {
+    return true;
+  }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDILTargetLowering::computeMaskedBitsForTargetNode(
+  const SDValue Op,
+  APInt &KnownZero,
+  APInt &KnownOne,
+  const SelectionDAG &DAG,
+  unsigned Depth) const
+{
+  APInt KnownZero2;
+  APInt KnownOne2;
+  unsigned BitWidth = KnownZero.getBitWidth();
+  KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything
+  switch (Op.getOpcode()) {
+  default:
+    break;
+  case AMDILISD::SELECT_CC:
+    DAG.ComputeMaskedBits(
+      Op.getOperand(1),
+      KnownZero,
+      KnownOne,
+      Depth + 1
+    );
+    DAG.ComputeMaskedBits(
+      Op.getOperand(0),
+      KnownZero2,
+      KnownOne2
+    );
+    assert((KnownZero & KnownOne) == 0
+           && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0
+           && "Bits known to be one AND zero?");
+    // Only known if known in both the LHS and RHS
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  };
+}
+
+// This is the function that determines which calling convention should
+// be used. Currently there is only one calling convention
+CCAssignFn*
+AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
+{
+  //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  return CC_AMDIL32;
+}
+
+// LowerCallResult - Lower the result values of an ISD::CALL into the
+// appropriate copies out of appropriate physical registers.  This assumes that
+// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+// being lowered.  The returns a SDNode with the same number of values as the
+// ISD::CALL.
+SDValue
+AMDILTargetLowering::LowerCallResult(
+  SDValue Chain,
+  SDValue InFlag,
+  CallingConv::ID CallConv,
+  bool isVarArg,
+  const SmallVectorImpl<ISD::InputArg> &Ins,
+  DebugLoc dl,
+  SelectionDAG &DAG,
+  SmallVectorImpl<SDValue> &InVals) const
+{
+  // Assign locations to each value returned by this call
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    EVT CopyVT = RVLocs[i].getValVT();
+    if (RVLocs[i].isRegLoc()) {
+      Chain = DAG.getCopyFromReg(
+                Chain,
+                dl,
+                RVLocs[i].getLocReg(),
+                CopyVT,
+                InFlag
+              ).getValue(1);
+      SDValue Val = Chain.getValue(0);
+      InFlag = Chain.getValue(2);
+      InVals.push_back(Val);
+    }
+  }
+
+  return Chain;
+
+}
+
+//===----------------------------------------------------------------------===//
+//                           Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AMDILTargetLowering::EmitInstrWithCustomInserter(
+  MachineInstr *MI, MachineBasicBlock *BB) const
+{
+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+  switch (MI->getOpcode()) {
+    ExpandCaseToAllTypes(AMDIL::CMP);
+    generateCMPInstr(MI, BB, TII);
+    MI->eraseFromParent();
+    break;
+  default:
+    break;
+  }
+  return BB;
+}
+
+// Recursively assign SDNodeOrdering to any unordered nodes
+// This is necessary to maintain source ordering of instructions
+// under -O0 to avoid odd-looking "skipping around" issues.
+static const SDValue
+Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
+{
+  if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
+    DAG.AssignOrdering( New.getNode(), order );
+    for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
+      Ordered( DAG, order, New.getOperand(i) );
+  }
+  return New;
+}
+
+#define LOWER(A) \
+  case ISD:: A: \
+return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
+
+SDValue
+AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+  switch (Op.getOpcode()) {
+  default:
+    Op.getNode()->dump();
+    assert(0 && "Custom lowering code for this"
+           "instruction is not implemented yet!");
+    break;
+    LOWER(GlobalAddress);
+    LOWER(JumpTable);
+    LOWER(ConstantPool);
+    LOWER(ExternalSymbol);
+    LOWER(FP_TO_SINT);
+    LOWER(FP_TO_UINT);
+    LOWER(SINT_TO_FP);
+    LOWER(UINT_TO_FP);
+    LOWER(ADD);
+    LOWER(MUL);
+    LOWER(SUB);
+    LOWER(FDIV);
+    LOWER(SDIV);
+    LOWER(SREM);
+    LOWER(UDIV);
+    LOWER(UREM);
+    LOWER(BUILD_VECTOR);
+    LOWER(INSERT_VECTOR_ELT);
+    LOWER(EXTRACT_VECTOR_ELT);
+    LOWER(EXTRACT_SUBVECTOR);
+    LOWER(SCALAR_TO_VECTOR);
+    LOWER(CONCAT_VECTORS);
+    LOWER(AND);
+    LOWER(OR);
+    LOWER(SELECT);
+    LOWER(SELECT_CC);
+    LOWER(SETCC);
+    LOWER(SIGN_EXTEND_INREG);
+    LOWER(BITCAST);
+    LOWER(DYNAMIC_STACKALLOC);
+    LOWER(BRCOND);
+    LOWER(BR_CC);
+    LOWER(FP_ROUND);
+  }
+  return Op;
+}
+
+int
+AMDILTargetLowering::getVarArgsFrameOffset() const
+{
+  return VarArgsFrameOffset;
+}
+#undef LOWER
+
+SDValue
+AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue DST = Op;
+  const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *G = GADN->getGlobal();
+  DebugLoc DL = Op.getDebugLoc();
+  MachineFunction &MF = DAG.getMachineFunction();
+  AMDILModuleInfo* AMI = &(MF.getMMI().getObjFileInfo<AMDILModuleInfo>());
+  EVT PtrVT = getPointerTy();
+
+  int64_t base_offset = GADN->getOffset();
+  int32_t arrayoffset = AMI->getArrayOffset(G->getName().str());
+  int32_t constoffset = AMI->getConstOffset(G->getName().str());
+  if (arrayoffset != -1) {
+    DST = DAG.getConstant(arrayoffset, PtrVT);
+    DST = DAG.getNode(ISD::ADD, DL, PtrVT,
+                      DST, DAG.getConstant(base_offset, PtrVT));
+  } else if (constoffset != -1) {
+    if (AMI->getConstHWBit(G->getName().str())) {
+      DST = DAG.getConstant(constoffset, PtrVT);
+      DST = DAG.getNode(ISD::ADD, DL, PtrVT,
+                        DST, DAG.getConstant(base_offset, PtrVT));
+    } else {
+      SDValue addr = DAG.getTargetGlobalAddress(G, DL, PtrVT);
+      SDValue DPReg = DAG.getRegister(AMDIL::SDP, PtrVT);
+      DPReg = DAG.getNode(ISD::ADD, DL, PtrVT, DPReg,
+                          DAG.getConstant(base_offset, PtrVT));
+      DST = DAG.getNode(AMDILISD::ADDADDR, DL, PtrVT, addr, DPReg);
+    }
+  } else {
+    const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+    if (!GV) {
+      DST = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
+    } else {
+      if (GV->hasInitializer()) {
+        const Constant *C = dyn_cast<Constant>(GV->getInitializer());
+        if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+          DST = DAG.getConstant(CI->getValue(), Op.getValueType());
+
+        } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
+          DST = DAG.getConstantFP(CF->getValueAPF(),
+                                  Op.getValueType());
+        } else if (dyn_cast<ConstantAggregateZero>(C)) {
+          EVT VT = Op.getValueType();
+          if (VT.isInteger()) {
+            DST = DAG.getConstant(0, VT);
+          } else {
+            DST = DAG.getConstantFP(0, VT);
+          }
+        } else {
+          assert(!"lowering this type of Global Address "
+                 "not implemented yet!");
+          C->dump();
+          DST = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
+        }
+      } else {
+        DST = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
+      }
+    }
+  }
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
+  return Result;
+}
+SDValue
+AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  EVT PtrVT = Op.getValueType();
+  SDValue Result;
+  if (CP->isMachineConstantPoolEntry()) {
+    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+                                       CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+  } else {
+    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+                                       CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+  }
+  return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
+{
+  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+  return Result;
+}
+/// LowerFORMAL_ARGUMENTS - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg, hasStructRet, isMemReg
+SDValue
+AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
+    CallingConv::ID CallConv,
+    bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins,
+    DebugLoc dl,
+    SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals)
+const
+{
+  MachineFunction &MF = DAG.getMachineFunction();
+  AMDILMachineFunctionInfo *FuncInfo
+  = MF.getInfo<AMDILMachineFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  //const Function *Fn = MF.getFunction();
+  //MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CallingConv::ID CC = MF.getFunction()->getCallingConv();
+  //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
+
+  CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  // When more calling conventions are added, they need to be chosen here
+  CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
+  SDValue StackPtr;
+
+  //unsigned int FirstStackArgLoc = 0;
+
+  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      EVT ValVT = VA.getValVT();
+      const TargetRegisterClass *RC = getRegClassFromType(
+                                        RegVT.getSimpleVT().SimpleTy);
+
+      unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      FuncInfo->addArgReg(VA.getLocReg());
+      SDValue ArgValue = DAG.getCopyFromReg(
+                           Chain,
+                           dl,
+                           Reg,
+                           RegVT);
+      // If this is an 8 or 16-bit value, it is really passed
+      // promoted to 32 bits.  Insert an assert[sz]ext to capture
+      // this, then truncate to the right size.
+
+      if (VA.getLocInfo() == CCValAssign::SExt) {
+        ArgValue = DAG.getNode(
+                     ISD::AssertSext,
+                     dl,
+                     RegVT,
+                     ArgValue,
+                     DAG.getValueType(ValVT));
+      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
+        ArgValue = DAG.getNode(
+                     ISD::AssertZext,
+                     dl,
+                     RegVT,
+                     ArgValue,
+                     DAG.getValueType(ValVT));
+      }
+      if (VA.getLocInfo() != CCValAssign::Full) {
+        ArgValue = DAG.getNode(
+                     ISD::TRUNCATE,
+                     dl,
+                     ValVT,
+                     ArgValue);
+      }
+      // Add the value to the list of arguments
+      // to be passed in registers
+      InVals.push_back(ArgValue);
+      if (isVarArg) {
+        assert(0 && "Variable arguments are not yet supported");
+        // See MipsISelLowering.cpp for ideas on how to implement
+      }
+    } else if(VA.isMemLoc()) {
+      InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
+                                        dl, DAG, VA, MFI, i));
+    } else {
+      assert(0 && "found a Value Assign that is "
+             "neither a register or a memory location");
+    }
+  }
+  /*if (hasStructRet) {
+    assert(0 && "Has struct return is not yet implemented");
+  // See MipsISelLowering.cpp for ideas on how to implement
+  }*/
+
+  unsigned int StackSize = CCInfo.getNextStackOffset();
+  if (isVarArg) {
+    assert(0 && "Variable arguments are not yet supported");
+    // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
+  }
+  // This needs to be changed to non-zero if the return function needs
+  // to pop bytes
+  FuncInfo->setBytesToPopOnReturn(StackSize);
+  return Chain;
+}
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG)
+{
+  assert(0 && "MemCopy does not exist yet");
+  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+  return DAG.getMemcpy(Chain,
+                       Src.getDebugLoc(),
+                       Dst, Src, SizeNode, Flags.getByValAlign(),
+                       /*IsVol=*/false, /*AlwaysInline=*/true,
+                       MachinePointerInfo(), MachinePointerInfo());
+}
+
+SDValue
+AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
+                                      SDValue StackPtr, SDValue Arg,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      const CCValAssign &VA,
+                                      ISD::ArgFlagsTy Flags) const
+{
+  unsigned int LocMemOffset = VA.getLocMemOffset();
+  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+  PtrOff = DAG.getNode(ISD::ADD,
+                       dl,
+                       getPointerTy(), StackPtr, PtrOff);
+  if (Flags.isByVal()) {
+    PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
+  } else {
+    PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
+                          MachinePointerInfo::getStack(LocMemOffset),
+                          false, false, 0);
+  }
+  return PtrOff;
+}
+/// LowerCAL - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall, hasStructRet
+SDValue
+AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool doesNotReturn, bool isVarArg, bool& isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals)
+const
+{
+  isTailCall = false;
+  MachineFunction& MF = DAG.getMachineFunction();
+  // FIXME: DO we need to handle fast calling conventions and tail call
+  // optimizations?? X86/PPC ISelLowering
+  /*bool hasStructRet = (TheCall->getNumArgs())
+    ? TheCall->getArgFlags(0).device()->isSRet()
+    : false;*/
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Analyze operands of the call, assigning locations to each operand
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  // Analyize the calling operands, but need to change
+  // if we have more than one calling convetion
+  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+  unsigned int NumBytes = CCInfo.getNextStackOffset();
+  if (isTailCall) {
+    assert(isTailCall && "Tail Call not handled yet!");
+    // See X86/PPC ISelLowering
+  }
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+  SDValue StackPtr;
+  //unsigned int FirstStacArgLoc = 0;
+  //int LastArgStackLoc = 0;
+
+  // Walk the register/memloc assignments, insert copies/loads
+  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    // Arguments start after the 5 first operands of ISD::CALL
+    SDValue Arg = OutVals[i];
+    //Promote the value if needed
+    switch(VA.getLocInfo()) {
+    default:
+      assert(0 && "Unknown loc info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND,
+                        dl,
+                        VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND,
+                        dl,
+                        VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND,
+                        dl,
+                        VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else if (VA.isMemLoc()) {
+      // Create the frame index object for this incoming parameter
+      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+                                      VA.getLocMemOffset(), true
+                                     );
+      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+      // emit ISD::STORE whichs stores the
+      // parameter value to a stack Location
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo::getFixedStack(FI),
+                                         false, false, 0));
+    } else {
+      assert(0 && "Not a Reg/Mem Loc, major error!");
+    }
+  }
+  if (!MemOpChains.empty()) {
+    Chain = DAG.getNode(ISD::TokenFactor,
+                        dl,
+                        MVT::Other,
+                        &MemOpChains[0],
+                        MemOpChains.size());
+  }
+  SDValue InFlag;
+  if (!isTailCall) {
+    for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain,
+                               dl,
+                               RegsToPass[i].first,
+                               RegsToPass[i].second,
+                               InFlag);
+      InFlag = Chain.getValue(1);
+    }
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+  // every direct call is) turn it into a TargetGlobalAddress/
+  // TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+  } else if (isTailCall) {
+    assert(0 && "Tail calls are not handled yet");
+    // see X86 ISelLowering for ideas on implementation: 1708
+  }
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
+  SmallVector<SDValue, 8> Ops;
+
+  if (isTailCall) {
+    assert(0 && "Tail calls are not handled yet");
+    // see X86 ISelLowering for ideas on implementation: 1721
+  }
+  // If this is a direct call, pass the chain and the callee
+  if (Callee.getNode()) {
+    Ops.push_back(Chain);
+    Ops.push_back(Callee);
+  }
+
+  if (isTailCall) {
+    assert(0 && "Tail calls are not handled yet");
+    // see X86 ISelLowering for ideas on implementation: 1739
+  }
+
+  // Add argument registers to the end of the list so that they are known
+  // live into the call
+  for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Ops.push_back(DAG.getRegister(
+                    RegsToPass[i].first,
+                    RegsToPass[i].second.getValueType()));
+  }
+  if (InFlag.getNode()) {
+    Ops.push_back(InFlag);
+  }
+
+  // Emit Tail Call
+  if (isTailCall) {
+    assert(0 && "Tail calls are not handled yet");
+    // see X86 ISelLowering for ideas on implementation: 1762
+  }
+
+  Chain = DAG.getNode(AMDILISD::CALL,
+                      dl,
+                      NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node
+  Chain = DAG.getCALLSEQ_END(
+            Chain,
+            DAG.getIntPtrConstant(NumBytes, true),
+            DAG.getIntPtrConstant(0, true),
+            InFlag);
+  InFlag = Chain.getValue(1);
+  // Handle result values, copying them out of physregs into vregs that
+  // we return
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+                         InVals);
+}
+static void checkMADType(
+  SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
+{
+  bool globalLoadStore = false;
+  is24bitMAD = false;
+  is32bitMAD = false;
+  return;
+  assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
+         "this to work correctly!");
+  if (Op.getNode()->use_empty()) {
+    return;
+  }
+  for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
+       nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
+    SDNode *ptr = *nBegin;
+    const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
+    // If we are not a LSBaseSDNode then we don't do this
+    // optimization.
+    // If we are a LSBaseSDNode, but the op is not the offset
+    // or base pointer, then we don't do this optimization
+    // (i.e. we are the value being stored)
+    if (!lsNode ||
+        (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
+      return;
+    }
+    const PointerType *PT =
+      dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
+    unsigned as = PT->getAddressSpace();
+    switch(as) {
+    default:
+      globalLoadStore = true;
+    case AMDILAS::PRIVATE_ADDRESS:
+      if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+        globalLoadStore = true;
+      }
+      break;
+    case AMDILAS::CONSTANT_ADDRESS:
+      if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+        globalLoadStore = true;
+      }
+      break;
+    case AMDILAS::LOCAL_ADDRESS:
+      if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+        globalLoadStore = true;
+      }
+      break;
+    case AMDILAS::REGION_ADDRESS:
+      if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+        globalLoadStore = true;
+      }
+      break;
+    }
+  }
+  if (globalLoadStore) {
+    is32bitMAD = true;
+  } else {
+    is24bitMAD = true;
+  }
+}
+
+SDValue
+AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue DST;
+  const AMDILSubtarget *stm = &this->getTargetMachine()
+                              .getSubtarget<AMDILSubtarget>();
+  bool isVec = OVT.isVector();
+  if (OVT.getScalarType() == MVT::i64) {
+    MVT INTTY = MVT::i32;
+    if (OVT == MVT::v2i64) {
+      INTTY = MVT::v2i32;
+    }
+    if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
+        && INTTY == MVT::i32) {
+      DST = DAG.getNode(AMDILISD::ADD,
+                        DL,
+                        OVT,
+                        LHS, RHS);
+    } else {
+      SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
+      // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
+      LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
+      RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
+      LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
+      RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+      INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
+      INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
+      SDValue cmp;
+      cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                        DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+                        INTLO, RHSLO);
+      cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
+      INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
+      DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
+                        INTLO, INTHI);
+    }
+  } else {
+    if (LHS.getOpcode() == ISD::FrameIndex ||
+        RHS.getOpcode() == ISD::FrameIndex) {
+      DST = DAG.getNode(AMDILISD::ADDADDR,
+                        DL,
+                        OVT,
+                        LHS, RHS);
+    } else {
+      if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+          && LHS.getNumOperands()
+          && RHS.getNumOperands()) {
+        bool is24bitMAD = false;
+        bool is32bitMAD = false;
+        const ConstantSDNode *LHSConstOpCode =
+          dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
+        const ConstantSDNode *RHSConstOpCode =
+          dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
+        if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
+            || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
+            || LHS.getOpcode() == ISD::MUL
+            || RHS.getOpcode() == ISD::MUL) {
+          SDValue Op1, Op2, Op3;
+          // FIXME: Fix this so that it works for unsigned 24bit ops.
+          if (LHS.getOpcode() == ISD::MUL) {
+            Op1 = LHS.getOperand(0);
+            Op2 = LHS.getOperand(1);
+            Op3 = RHS;
+          } else if (RHS.getOpcode() == ISD::MUL) {
+            Op1 = RHS.getOperand(0);
+            Op2 = RHS.getOperand(1);
+            Op3 = LHS;
+          } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
+            Op1 = LHS.getOperand(0);
+            Op2 = DAG.getConstant(
+                    1 << LHSConstOpCode->getZExtValue(), MVT::i32);
+            Op3 = RHS;
+          } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
+            Op1 = RHS.getOperand(0);
+            Op2 = DAG.getConstant(
+                    1 << RHSConstOpCode->getZExtValue(), MVT::i32);
+            Op3 = LHS;
+          }
+          checkMADType(Op, stm, is24bitMAD, is32bitMAD);
+          // We can possibly do a MAD transform!
+          if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+            uint32_t opcode = AMDILIntrinsic::AMDIL_mad24_i32;
+            SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+            DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+                              DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
+                              Op1, Op2, Op3);
+          } else if(is32bitMAD) {
+            SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+            DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+                              DL, Tys, DAG.getEntryNode(),
+                              DAG.getConstant(
+                                AMDILIntrinsic::AMDIL_mad_i32, MVT::i32),
+                              Op1, Op2, Op3);
+          }
+        }
+      }
+      DST = DAG.getNode(AMDILISD::ADD,
+                        DL,
+                        OVT,
+                        LHS, RHS);
+    }
+  }
+  return DST;
+}
+SDValue
+AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
+                              uint32_t bits) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT INTTY = Op.getValueType();
+  EVT FPTY;
+  if (INTTY.isVector()) {
+    FPTY = EVT(MVT::getVectorVT(MVT::f32,
+                                INTTY.getVectorNumElements()));
+  } else {
+    FPTY = EVT(MVT::f32);
+  }
+  /* static inline uint
+     __clz_Nbit(uint x)
+     {
+     int xor = 0x3f800000U | x;
+     float tp = as_float(xor);
+     float t = tp + -1.0f;
+     uint tint = as_uint(t);
+     int cmp = (x != 0);
+     uint tsrc = tint >> 23;
+     uint tmask = tsrc & 0xffU;
+     uint cst = (103 + N)U - tmask;
+     return cmp ? cst : N;
+     }
+     */
+  assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
+         && "genCLZu16 only works on 32bit types");
+  // uint x = Op
+  SDValue x = Op;
+  // xornode = 0x3f800000 | x
+  SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
+                                DAG.getConstant(0x3f800000, INTTY), x);
+  // float tp = as_float(xornode)
+  SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
+  // float t = tp + -1.0f
+  SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
+                          DAG.getConstantFP(-1.0f, FPTY));
+  // uint tint = as_uint(t)
+  SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
+  // int cmp = (x != 0)
+  SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                            DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
+                            DAG.getConstant(0, INTTY));
+  // uint tsrc = tint >> 23
+  SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
+                             DAG.getConstant(23, INTTY));
+  // uint tmask = tsrc & 0xFF
+  SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
+                              DAG.getConstant(0xFFU, INTTY));
+  // uint cst = (103 + bits) - tmask
+  SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
+                            DAG.getConstant((103U + bits), INTTY), tmask);
+  // return cmp ? cst : N
+  cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
+                    DAG.getConstant(bits, INTTY));
+  return cst;
+}
+
+SDValue
+AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue DST = SDValue();
+  DebugLoc DL = Op.getDebugLoc();
+  EVT INTTY = Op.getValueType();
+  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+                                &this->getTargetMachine())->getSubtargetImpl();
+  if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+    //__clz_32bit(uint u)
+    //{
+    // int z = __amdil_ffb_hi(u) ;
+    // return z < 0 ? 32 : z;
+    // }
+    // uint u = op
+    SDValue u = Op;
+    // int z = __amdil_ffb_hi(u)
+    SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
+    // int cmp = z < 0
+    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                              DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+                              z, DAG.getConstant(0, INTTY));
+    // return cmp ? 32 : z
+    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
+                      DAG.getConstant(32, INTTY), z);
+  } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+    //  static inline uint
+    //__clz_32bit(uint x)
+    //{
+    //    uint zh = __clz_16bit(x >> 16);
+    //    uint zl = __clz_16bit(x & 0xffffU);
+    //   return zh == 16U ? 16U + zl : zh;
+    //}
+    // uint x = Op
+    SDValue x = Op;
+    // uint xs16 = x >> 16
+    SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
+                               DAG.getConstant(16, INTTY));
+    // uint zh = __clz_16bit(xs16)
+    SDValue zh = genCLZuN(xs16, DAG, 16);
+    // uint xa16 = x & 0xFFFF
+    SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
+                               DAG.getConstant(0xFFFFU, INTTY));
+    // uint zl = __clz_16bit(xa16)
+    SDValue zl = genCLZuN(xa16, DAG, 16);
+    // uint cmp = zh == 16U
+    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                              DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+                              zh, DAG.getConstant(16U, INTTY));
+    // uint zl16 = zl + 16
+    SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
+                               DAG.getConstant(16, INTTY), zl);
+    // return cmp ? zl16 : zh
+    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
+                      cmp, zl16, zh);
+  } else {
+    assert(0 && "Attempting to generate a CLZ function with an"
+           " unknown graphics card");
+  }
+  return DST;
+}
+SDValue
+AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue DST = SDValue();
+  DebugLoc DL = Op.getDebugLoc();
+  EVT INTTY;
+  EVT LONGTY = Op.getValueType();
+  bool isVec = LONGTY.isVector();
+  if (isVec) {
+    INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
+                                 .getVectorNumElements()));
+  } else {
+    INTTY = EVT(MVT::i32);
+  }
+  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+                                &this->getTargetMachine())->getSubtargetImpl();
+  if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+    // Evergreen:
+    // static inline uint
+    // __clz_u64(ulong x)
+    // {
+    //uint zhi = __clz_32bit((uint)(x >> 32));
+    //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
+    //return zhi == 32U ? 32U + zlo : zhi;
+    //}
+    //ulong x = op
+    SDValue x = Op;
+    // uint xhi = x >> 32
+    SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+    // uint xlo = x & 0xFFFFFFFF
+    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
+    // uint zhi = __clz_32bit(xhi)
+    SDValue zhi = genCLZu32(xhi, DAG);
+    // uint zlo = __clz_32bit(xlo)
+    SDValue zlo = genCLZu32(xlo, DAG);
+    // uint cmp = zhi == 32
+    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                              DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+                              zhi, DAG.getConstant(32U, INTTY));
+    // uint zlop32 = 32 + zlo
+    SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
+                                 DAG.getConstant(32U, INTTY), zlo);
+    // return cmp ? zlop32: zhi
+    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
+  } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+    // HD4XXX:
+    //  static inline uint
+    //__clz_64bit(ulong x)
+    //{
+    //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
+    //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
+    //uint zl = __clz_23bit((uint)x & 0x7fffffU);
+    //uint r = zh == 18U ? 18U + zm : zh;
+    //return zh + zm == 41U ? 41U + zl : r;
+    //}
+    //ulong x = Op
+    SDValue x = Op;
+    // ulong xs46 = x >> 46
+    SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
+                               DAG.getConstant(46, LONGTY));
+    // uint ixs46 = (uint)xs46
+    SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
+    // ulong xs23 = x >> 23
+    SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
+                               DAG.getConstant(23, LONGTY));
+    // uint ixs23 = (uint)xs23
+    SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
+    // uint xs23m23 = ixs23 & 0x7FFFFF
+    SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
+                                  DAG.getConstant(0x7fffffU, INTTY));
+    // uint ix = (uint)x
+    SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+    // uint xm23 = ix & 0x7FFFFF
+    SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
+                               DAG.getConstant(0x7fffffU, INTTY));
+    // uint zh = __clz_23bit(ixs46)
+    SDValue zh = genCLZuN(ixs46, DAG, 23);
+    // uint zm = __clz_23bit(xs23m23)
+    SDValue zm = genCLZuN(xs23m23, DAG, 23);
+    // uint zl = __clz_23bit(xm23)
+    SDValue zl = genCLZuN(xm23, DAG, 23);
+    // uint zhm5 = zh - 5
+    SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
+                               DAG.getConstant(-5U, INTTY));
+    SDValue const18 = DAG.getConstant(18, INTTY);
+    SDValue const41 = DAG.getConstant(41, INTTY);
+    // uint cmp1 = zh = 18
+    SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                               DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+                               zhm5, const18);
+    // uint zhm5zm = zhm5 + zh
+    SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
+    // uint cmp2 = zhm5zm == 41
+    SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                               DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+                               zhm5zm, const41);
+    // uint zmp18 = zhm5 + 18
+    SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
+    // uint zlp41 = zl + 41
+    SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
+    // uint r = cmp1 ? zmp18 : zh
+    SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
+                            cmp1, zmp18, zhm5);
+    // return cmp2 ? zlp41 : r
+    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
+  } else {
+    assert(0 && "Attempting to generate a CLZ function with an"
+           " unknown graphics card");
+  }
+  return DST;
+}
+SDValue
+AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
+                                 bool includeSign) const
+{
+  EVT INTVT;
+  EVT LONGVT;
+  SDValue DST;
+  DebugLoc DL = RHS.getDebugLoc();
+  EVT RHSVT = RHS.getValueType();
+  bool isVec = RHSVT.isVector();
+  if (isVec) {
+    LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
+                                  .getVectorNumElements()));
+    INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
+                                 .getVectorNumElements()));
+  } else {
+    LONGVT = EVT(MVT::i64);
+    INTVT = EVT(MVT::i32);
+  }
+  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+                                &this->getTargetMachine())->getSubtargetImpl();
+  if (0 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+    // unsigned version:
+    // uint uhi = (uint)(d * 0x1.0p-32);
+    // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
+    // return as_ulong2((uint2)(ulo, uhi));
+    //
+    // signed version:
+    // double ad = fabs(d);
+    // long l = unsigned_version(ad);
+    // long nl = -l;
+    // return d == ad ? l : nl;
+    SDValue d = RHS;
+    if (includeSign) {
+      d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
+    }
+    uint64_t val = 0x3DF0000000000000ULL;
+    double dval = *(double*)&val;
+    SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
+                               DAG.getConstantFP(dval, RHSVT));
+    SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
+    SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
+    val = 0xC1F0000000000000ULL;
+    dval = *(double*)&val;
+    ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
+                       DAG.getConstantFP(dval, RHSVT), d);
+    SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
+    SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
+    if (includeSign) {
+      SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
+      SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
+                              DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
+                              RHS, d);
+      l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
+    }
+    DST = l;
+  } else {
+    /*
+       __attribute__((always_inline)) long
+       cast_f64_to_i64(double d)
+       {
+    // Convert d in to 32-bit components
+    long x = as_long(d);
+    xhi = LCOMPHI(x);
+    xlo = LCOMPLO(x);
+
+    // Generate 'normalized' mantissa
+    mhi = xhi | 0x00100000; // hidden bit
+    mhi <<= 11;
+    temp = xlo >> (32 - 11);
+    mhi |= temp
+    mlo = xlo << 11;
+
+    // Compute shift right count from exponent
+    e = (xhi >> (52-32)) & 0x7ff;
+    sr = 1023 + 63 - e;
+    srge64 = sr >= 64;
+    srge32 = sr >= 32;
+
+    // Compute result for 0 <= sr < 32
+    rhi0 = mhi >> (sr &31);
+    rlo0 = mlo >> (sr &31);
+    temp = mhi << (32 - sr);
+    temp |= rlo0;
+    rlo0 = sr ? temp : rlo0;
+
+    // Compute result for 32 <= sr
+    rhi1 = 0;
+    rlo1 = srge64 ? 0 : rhi0;
+
+    // Pick between the 2 results
+    rhi = srge32 ? rhi1 : rhi0;
+    rlo = srge32 ? rlo1 : rlo0;
+
+    // Optional saturate on overflow
+    srlt0 = sr < 0;
+    rhi = srlt0 ? MAXVALUE : rhi;
+    rlo = srlt0 ? MAXVALUE : rlo;
+
+    // Create long
+    res = LCREATE( rlo, rhi );
+
+    // Deal with sign bit (ignoring whether result is signed or unsigned value)
+    if (includeSign) {
+    sign = ((signed int) xhi) >> 31; fill with sign bit
+    sign = LCREATE( sign, sign );
+    res += sign;
+    res ^= sign;
+    }
+
+    return res;
+    }
+    */
+    SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
+    SDValue c32 = DAG.getConstant( 32, INTVT );
+
+    // Convert d in to 32-bit components
+    SDValue d = RHS;
+    SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
+    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+    // Generate 'normalized' mantissa
+    SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
+                               xhi, DAG.getConstant( 0x00100000, INTVT ) );
+    mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
+    SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
+                                xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
+    mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
+    SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
+
+    // Compute shift right count from exponent
+    SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
+                             xhi, DAG.getConstant( 52-32, INTVT ) );
+    e = DAG.getNode( ISD::AND, DL, INTVT,
+                     e, DAG.getConstant( 0x7ff, INTVT ) );
+    SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
+                              DAG.getConstant( 1023 + 63, INTVT ), e );
+    SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+                                  DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+                                  sr, DAG.getConstant(64, INTVT));
+    SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+                                  DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+                                  sr, DAG.getConstant(32, INTVT));
+
+    // Compute result for 0 <= sr < 32
+    SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
+    SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
+    temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
+    temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
+    temp = DAG.getNode( ISD::OR,  DL, INTVT, rlo0, temp );
+    rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
+
+    // Compute result for 32 <= sr
+    SDValue rhi1 = DAG.getConstant( 0, INTVT );
+    SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+                                srge64, rhi1, rhi0 );
+
+    // Pick between the 2 results
+    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+                               srge32, rhi1, rhi0 );
+    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+                               srge32, rlo1, rlo0 );
+
+    // Create long
+    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+
+    // Deal with sign bit
+    if (includeSign) {
+      SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
+                                  xhi, DAG.getConstant( 31, INTVT ) );
+      sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
+      res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
+      res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
+    }
+    DST = res;
+  }
+  return DST;
+}
+SDValue
+AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
+                                 bool includeSign) const
+{
+  EVT INTVT;
+  EVT LONGVT;
+  DebugLoc DL = RHS.getDebugLoc();
+  EVT RHSVT = RHS.getValueType();
+  bool isVec = RHSVT.isVector();
+  if (isVec) {
+    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+                                  RHSVT.getVectorNumElements()));
+    INTVT = EVT(MVT::getVectorVT(MVT::i32,
+                                 RHSVT.getVectorNumElements()));
+  } else {
+    LONGVT = EVT(MVT::i64);
+    INTVT = EVT(MVT::i32);
+  }
+  /*
+     __attribute__((always_inline)) int
+     cast_f64_to_[u|i]32(double d)
+     {
+  // Convert d in to 32-bit components
+  long x = as_long(d);
+  xhi = LCOMPHI(x);
+  xlo = LCOMPLO(x);
+
+  // Generate 'normalized' mantissa
+  mhi = xhi | 0x00100000; // hidden bit
+  mhi <<= 11;
+  temp = xlo >> (32 - 11);
+  mhi |= temp
+
+  // Compute shift right count from exponent
+  e = (xhi >> (52-32)) & 0x7ff;
+  sr = 1023 + 31 - e;
+  srge32 = sr >= 32;
+
+  // Compute result for 0 <= sr < 32
+  res = mhi >> (sr &31);
+  res = srge32 ? 0 : res;
+
+  // Optional saturate on overflow
+  srlt0 = sr < 0;
+  res = srlt0 ? MAXVALUE : res;
+
+  // Deal with sign bit (ignoring whether result is signed or unsigned value)
+  if (includeSign) {
+  sign = ((signed int) xhi) >> 31; fill with sign bit
+  res += sign;
+  res ^= sign;
+  }
+
+  return res;
+  }
+  */
+  SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
+
+  // Convert d in to 32-bit components
+  SDValue d = RHS;
+  SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
+  SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+  SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+  // Generate 'normalized' mantissa
+  SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
+                             xhi, DAG.getConstant( 0x00100000, INTVT ) );
+  mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
+  SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
+                              xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
+  mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
+
+  // Compute shift right count from exponent
+  SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
+                           xhi, DAG.getConstant( 52-32, INTVT ) );
+  e = DAG.getNode( ISD::AND, DL, INTVT,
+                   e, DAG.getConstant( 0x7ff, INTVT ) );
+  SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
+                            DAG.getConstant( 1023 + 31, INTVT ), e );
+  SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+                                DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+                                sr, DAG.getConstant(32, INTVT));
+
+  // Compute result for 0 <= sr < 32
+  SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
+  res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+                     srge32, DAG.getConstant(0,INTVT), res );
+
+  // Deal with sign bit
+  if (includeSign) {
+    SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
+                                xhi, DAG.getConstant( 31, INTVT ) );
+    res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
+    res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
+  }
+  return res;
+}
+SDValue
+AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue RHS = Op.getOperand(0);
+  EVT RHSVT = RHS.getValueType();
+  MVT RST = RHSVT.getScalarType().getSimpleVT();
+  EVT LHSVT = Op.getValueType();
+  MVT LST = LHSVT.getScalarType().getSimpleVT();
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue DST;
+  const AMDILTargetMachine*
+  amdtm = reinterpret_cast<const AMDILTargetMachine*>
+          (&this->getTargetMachine());
+  const AMDILSubtarget*
+  stm = dynamic_cast<const AMDILSubtarget*>(
+          amdtm->getSubtargetImpl());
+  if (RST == MVT::f64 && RHSVT.isVector()) {
+    // We dont support vector 64bit floating point convertions.
+    for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
+      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                               DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+      op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
+      if (!x) {
+        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+      } else {
+        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
+                          DST, op, DAG.getTargetConstant(x, MVT::i32));
+      }
+    }
+  } else if (RST == MVT::f64
+             && LST == MVT::i32) {
+    if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()
+        && stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+      DST = SDValue(Op.getNode(), 0);
+    } else {
+      DST = genf64toi32(RHS, DAG, true);
+    }
+  } else if (RST == MVT::f64
+             && LST == MVT::i64) {
+    DST = genf64toi64(RHS, DAG, true);
+  } else if (RST == MVT::f64
+             && (LST == MVT::i8 || LST == MVT::i16)) {
+    if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()) {
+      DST = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, RHS);
+      DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, DST);
+    } else {
+      SDValue ToInt = genf64toi32(RHS, DAG, true);
+      DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
+    }
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue DST;
+  SDValue RHS = Op.getOperand(0);
+  EVT RHSVT = RHS.getValueType();
+  MVT RST = RHSVT.getScalarType().getSimpleVT();
+  EVT LHSVT = Op.getValueType();
+  MVT LST = LHSVT.getScalarType().getSimpleVT();
+  DebugLoc DL = Op.getDebugLoc();
+  const AMDILTargetMachine*
+  amdtm = reinterpret_cast<const AMDILTargetMachine*>
+          (&this->getTargetMachine());
+  const AMDILSubtarget*
+  stm = dynamic_cast<const AMDILSubtarget*>(
+          amdtm->getSubtargetImpl());
+  if (RST == MVT::f64 && RHSVT.isVector()) {
+    // We dont support vector 64bit floating point convertions.
+    for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
+      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                               DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+      op = DAG.getNode(ISD::FP_TO_UINT, DL, LST, op);
+      if (!x) {
+        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+      } else {
+        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
+                          DST, op, DAG.getTargetConstant(x, MVT::i32));
+      }
+
+    }
+  } else if (RST == MVT::f64
+             && LST == MVT::i32) {
+    if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()
+        && stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+      DST = SDValue(Op.getNode(), 0);
+    } else {
+      DST = genf64toi32(RHS, DAG, false);
+    }
+  } else if (RST == MVT::f64
+             && LST == MVT::i64) {
+    DST = genf64toi64(RHS, DAG, false);
+  } else if (RST == MVT::f64
+             && (LST == MVT::i8 || LST == MVT::i16)) {
+    if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()) {
+      DST = DAG.getNode(ISD::FP_TO_UINT, DL, MVT::i32, RHS);
+      DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, DST);
+    } else {
+      SDValue ToInt = genf64toi32(RHS, DAG, false);
+      DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
+    }
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+SDValue
+AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
+                                 SelectionDAG &DAG) const
+{
+  EVT RHSVT = RHS.getValueType();
+  DebugLoc DL = RHS.getDebugLoc();
+  EVT INTVT;
+  EVT LONGVT;
+  bool isVec = RHSVT.isVector();
+  if (isVec) {
+    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+                                  RHSVT.getVectorNumElements()));
+    INTVT = EVT(MVT::getVectorVT(MVT::i32,
+                                 RHSVT.getVectorNumElements()));
+  } else {
+    LONGVT = EVT(MVT::i64);
+    INTVT = EVT(MVT::i32);
+  }
+  SDValue x = RHS;
+  const AMDILTargetMachine*
+  amdtm = reinterpret_cast<const AMDILTargetMachine*>
+          (&this->getTargetMachine());
+  const AMDILSubtarget*
+  stm = dynamic_cast<const AMDILSubtarget*>(
+          amdtm->getSubtargetImpl());
+  if (stm->calVersion() >= CAL_VERSION_SC_135) {
+    // unsigned x = RHS;
+    // ulong xd = (ulong)(0x4330_0000 << 32) | x;
+    // double d = as_double( xd );
+    // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
+    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
+                              DAG.getConstant( 0x43300000, INTVT ) );
+    SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+    SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
+                                   DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
+    return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
+  } else {
+    SDValue clz = genCLZu32(x, DAG);
+
+    // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
+    // Except for an input 0... which requires a 0 exponent
+    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
+                               DAG.getConstant( (1023+31), INTVT), clz );
+    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
+
+    // Normalize frac
+    SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
+
+    // Eliminate hidden bit
+    rhi = DAG.getNode( ISD::AND, DL, INTVT,
+                       rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
+
+    // Pack exponent and frac
+    SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
+                               rhi, DAG.getConstant( (32 - 11), INTVT ) );
+    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
+                       rhi, DAG.getConstant( 11, INTVT ) );
+    exp = DAG.getNode( ISD::SHL, DL, INTVT,
+                       exp, DAG.getConstant( 20, INTVT ) );
+    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
+
+    // Convert 2 x 32 in to 1 x 64, then to double precision float type
+    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
+  }
+}
+SDValue
+AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
+                                 SelectionDAG &DAG) const
+{
+  EVT RHSVT = RHS.getValueType();
+  DebugLoc DL = RHS.getDebugLoc();
+  EVT INTVT;
+  EVT LONGVT;
+  bool isVec = RHSVT.isVector();
+  if (isVec) {
+    INTVT = EVT(MVT::getVectorVT(MVT::i32,
+                                 RHSVT.getVectorNumElements()));
+  } else {
+    INTVT = EVT(MVT::i32);
+  }
+  LONGVT = RHSVT;
+  SDValue x = RHS;
+  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+                                &this->getTargetMachine())->getSubtargetImpl();
+  if (0 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+    // double dhi = (double)(as_uint2(x).y);
+    // double dlo = (double)(as_uint2(x).x);
+    // return mad(dhi, 0x1.0p+32, dlo)
+    SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
+    dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
+    SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
+    dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
+    uint64_t val = 0x41f0000000000000ULL;
+    double dval = *(double*)&val;
+    return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
+                       DAG.getConstantFP(dval, LHSVT), dlo);
+  } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
+    // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
+    // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
+    // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
+    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );  // x & 0xffff_ffffUL
+    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
+    SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 :  AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
+    SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
+    SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
+    SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
+                             DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
+    hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
+    return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
+
+  } else {
+    SDValue clz = genCLZu64(x, DAG);
+    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+    // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
+    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
+                               DAG.getConstant( (1023+63), INTVT), clz );
+    SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
+    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+                       mash, exp, mash );  // exp = exp, or 0 if input was 0
+
+    // Normalize frac
+    SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
+                                 clz, DAG.getConstant( 31, INTVT ) );
+    SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
+                                  DAG.getConstant( 32, INTVT ), clz31 );
+    SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
+    SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
+    t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
+    SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
+    SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
+    SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
+    SDValue rlo2 = DAG.getConstant( 0, INTVT );
+    SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
+                                 clz, DAG.getConstant( 32, INTVT ) );
+    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+                               clz32, rhi2, rhi1 );
+    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+                               clz32, rlo2, rlo1 );
+
+    // Eliminate hidden bit
+    rhi = DAG.getNode( ISD::AND, DL, INTVT,
+                       rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
+
+    // Save bits needed to round properly
+    SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
+                                 rlo, DAG.getConstant( 0x7ff, INTVT ) );
+
+    // Pack exponent and frac
+    rlo = DAG.getNode( ISD::SRL, DL, INTVT,
+                       rlo, DAG.getConstant( 11, INTVT ) );
+    SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
+                                rhi, DAG.getConstant( (32 - 11), INTVT ) );
+    rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
+    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
+                       rhi, DAG.getConstant( 11, INTVT ) );
+    exp = DAG.getNode( ISD::SHL, DL, INTVT,
+                       exp, DAG.getConstant( 20, INTVT ) );
+    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
+
+    // Compute rounding bit
+    SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
+                                rlo, DAG.getConstant( 1, INTVT ) );
+    SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
+                               round, DAG.getConstant( 0x3ff, INTVT ) );
+    grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+                       DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
+                       grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
+    grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
+    round = DAG.getNode( ISD::SRL, DL, INTVT,
+                         round, DAG.getConstant( 10, INTVT ) );
+    round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
+
+    // Add rounding bit
+    SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
+                                  round, DAG.getConstant( 0, INTVT ) );
+    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+    res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
+    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
+  }
+}
+SDValue
+AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue RHS = Op.getOperand(0);
+  EVT RHSVT = RHS.getValueType();
+  MVT RST = RHSVT.getScalarType().getSimpleVT();
+  EVT LHSVT = Op.getValueType();
+  MVT LST = LHSVT.getScalarType().getSimpleVT();
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue DST;
+  EVT INTVT;
+  EVT LONGVT;
+  const AMDILTargetMachine*
+  amdtm = reinterpret_cast<const AMDILTargetMachine*>
+          (&this->getTargetMachine());
+  const AMDILSubtarget*
+  stm = dynamic_cast<const AMDILSubtarget*>(
+          amdtm->getSubtargetImpl());
+  if (LST == MVT::f64 && LHSVT.isVector()) {
+    // We dont support vector 64bit floating point convertions.
+    DST = Op;
+    for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
+      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                               DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+      op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
+      if (!x) {
+        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+      } else {
+        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
+                          op, DAG.getTargetConstant(x, MVT::i32));
+      }
+
+    }
+  } else if (RST == MVT::i32
+             && LST == MVT::f64) {
+    if (stm->device()->getGeneration() > AMDILDeviceInfo::HD4XXX
+        && stm->calVersion() >= CAL_VERSION_SC_155) {
+      DST = SDValue(Op.getNode(), 0);
+    } else {
+      DST = genu32tof64(RHS, LHSVT, DAG);
+    }
+  } else if (RST == MVT::i64
+             && LST == MVT::f64) {
+    DST = genu64tof64(RHS, LHSVT, DAG);
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue RHS = Op.getOperand(0);
+  EVT RHSVT = RHS.getValueType();
+  MVT RST = RHSVT.getScalarType().getSimpleVT();
+  EVT INTVT;
+  EVT LONGVT;
+  SDValue DST;
+  bool isVec = RHSVT.isVector();
+  DebugLoc DL = Op.getDebugLoc();
+  EVT LHSVT = Op.getValueType();
+  MVT LST = LHSVT.getScalarType().getSimpleVT();
+  const AMDILTargetMachine*
+  amdtm = reinterpret_cast<const AMDILTargetMachine*>
+          (&this->getTargetMachine());
+  const AMDILSubtarget*
+  stm = dynamic_cast<const AMDILSubtarget*>(
+          amdtm->getSubtargetImpl());
+  if (LST == MVT::f64 && LHSVT.isVector()) {
+    // We dont support vector 64bit floating point convertions.
+    for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
+      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                               DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+      op = DAG.getNode(ISD::SINT_TO_FP, DL, LST, op);
+      if (!x) {
+        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+      } else {
+        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
+                          op, DAG.getTargetConstant(x, MVT::i32));
+      }
+    }
+
+  } else {
+
+    if (isVec) {
+      LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+                                    RHSVT.getVectorNumElements()));
+      INTVT = EVT(MVT::getVectorVT(MVT::i32,
+                                   RHSVT.getVectorNumElements()));
+    } else {
+      LONGVT = EVT(MVT::i64);
+      INTVT = EVT(MVT::i32);
+    }
+    MVT RST = RHSVT.getScalarType().getSimpleVT();
+    if ((RST == MVT::i32 || RST == MVT::i64)
+        && LST == MVT::f64) {
+      if (RST == MVT::i32) {
+        if (stm->device()->getGeneration() > AMDILDeviceInfo::HD4XXX
+            && stm->calVersion() >= CAL_VERSION_SC_155) {
+          DST = SDValue(Op.getNode(), 0);
+          return DST;
+        }
+      }
+      SDValue c31 = DAG.getConstant( 31, INTVT );
+      SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
+
+      SDValue S;      // Sign, as 0 or -1
+      SDValue Sbit;   // Sign bit, as one bit, MSB only.
+      if (RST == MVT::i32) {
+        Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
+        S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
+      } else { // 64-bit case... SRA of 64-bit values is slow
+        SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
+        Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
+        SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
+        S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
+      }
+
+      // get abs() of input value, given sign as S (0 or -1)
+      // SpI = RHS + S
+      SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
+      // SpIxS = SpI ^ S
+      SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
+
+      // Convert unsigned value to double precision
+      SDValue R;
+      if (RST == MVT::i32) {
+        // r = cast_u32_to_f64(SpIxS)
+        R = genu32tof64(SpIxS, LHSVT, DAG);
+      } else {
+        // r = cast_u64_to_f64(SpIxS)
+        R = genu64tof64(SpIxS, LHSVT, DAG);
+      }
+
+      // drop in the sign bit
+      SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
+      SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
+      SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
+      thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
+      t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
+      DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
+    } else {
+      DST = SDValue(Op.getNode(), 0);
+    }
+  }
+  return DST;
+}
+SDValue
+AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue DST;
+  bool isVec = RHS.getValueType().isVector();
+  if (OVT.getScalarType() == MVT::i64) {
+    /*const AMDILTargetMachine*
+      amdtm = reinterpret_cast<const AMDILTargetMachine*>
+      (&this->getTargetMachine());
+      const AMDILSubtarget*
+      stm = dynamic_cast<const AMDILSubtarget*>(
+      amdtm->getSubtargetImpl());*/
+    MVT INTTY = MVT::i32;
+    if (OVT == MVT::v2i64) {
+      INTTY = MVT::v2i32;
+    }
+    SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
+    // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
+    LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
+    RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
+    LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
+    RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+    INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
+    INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
+    //TODO: need to use IBORROW on HD5XXX and later hardware
+    SDValue cmp;
+    if (OVT == MVT::i64) {
+      cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                        DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+                        LHSLO, RHSLO);
+    } else {
+      SDValue cmplo;
+      SDValue cmphi;
+      SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                                   DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
+      SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                                   DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
+      SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                                   DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
+      SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                                   DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
+      cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
+                          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+                          LHSRLO, RHSRLO);
+      cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
+                          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+                          LHSRHI, RHSRHI);
+      cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
+      cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
+                        cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
+    }
+    INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
+    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
+                      INTLO, INTHI);
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+SDValue
+AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
+{
+  EVT OVT = Op.getValueType();
+  SDValue DST;
+  if (OVT.getScalarType() == MVT::f64) {
+    DST = LowerFDIV64(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::f32) {
+    DST = LowerFDIV32(Op, DAG);
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
+{
+  EVT OVT = Op.getValueType();
+  SDValue DST;
+  if (OVT.getScalarType() == MVT::i64) {
+    DST = LowerSDIV64(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i32) {
+    DST = LowerSDIV32(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i16
+             || OVT.getScalarType() == MVT::i8) {
+    DST = LowerSDIV24(Op, DAG);
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
+{
+  EVT OVT = Op.getValueType();
+  SDValue DST;
+  if (OVT.getScalarType() == MVT::i64) {
+    DST = LowerUDIV64(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i32) {
+    DST = LowerUDIV32(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i16
+             || OVT.getScalarType() == MVT::i8) {
+    DST = LowerUDIV24(Op, DAG);
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
+{
+  EVT OVT = Op.getValueType();
+  SDValue DST;
+  if (OVT.getScalarType() == MVT::i64) {
+    DST = LowerSREM64(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i32) {
+    DST = LowerSREM32(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i16) {
+    DST = LowerSREM16(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i8) {
+    DST = LowerSREM8(Op, DAG);
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
+{
+  EVT OVT = Op.getValueType();
+  SDValue DST;
+  if (OVT.getScalarType() == MVT::i64) {
+    DST = LowerUREM64(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i32) {
+    DST = LowerUREM32(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i16) {
+    DST = LowerUREM16(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i8) {
+    DST = LowerUREM8(Op, DAG);
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue DST;
+  bool isVec = OVT.isVector();
+  if (OVT.getScalarType() != MVT::i64) {
+    DST = SDValue(Op.getNode(), 0);
+  } else {
+    assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
+    // TODO: This needs to be turned into a tablegen pattern
+    SDValue LHS = Op.getOperand(0);
+    SDValue RHS = Op.getOperand(1);
+
+    MVT INTTY = MVT::i32;
+    if (OVT == MVT::v2i64) {
+      INTTY = MVT::v2i32;
+    }
+    // mul64(h1, l1, h0, l0)
+    SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+                                DL,
+                                INTTY, LHS);
+    SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+                                DL,
+                                INTTY, LHS);
+    SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+                                DL,
+                                INTTY, RHS);
+    SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+                                DL,
+                                INTTY, RHS);
+    // MULLO_UINT_1 r1, h0, l1
+    SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
+                                 DL,
+                                 INTTY, RHSHI, LHSLO);
+    // MULLO_UINT_1 r2, h1, l0
+    SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
+                                 DL,
+                                 INTTY, RHSLO, LHSHI);
+    // ADD_INT hr, r1, r2
+    SDValue ADDHI = DAG.getNode(ISD::ADD,
+                                DL,
+                                INTTY, RHILLO, RLOHHI);
+    // MULHI_UINT_1 r3, l1, l0
+    SDValue RLOLLO = DAG.getNode(ISD::MULHU,
+                                 DL,
+                                 INTTY, RHSLO, LHSLO);
+    // ADD_INT hr, hr, r3
+    SDValue HIGH = DAG.getNode(ISD::ADD,
+                               DL,
+                               INTTY, ADDHI, RLOLLO);
+    // MULLO_UINT_1 l3, l1, l0
+    SDValue LOW = DAG.getNode(AMDILISD::UMUL,
+                              DL,
+                              INTTY, LHSLO, RHSLO);
+    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+                      DL,
+                      OVT, LOW, HIGH);
+  }
+  return DST;
+}
+SDValue
+AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
+{
+  EVT VT = Op.getValueType();
+  //printSDValue(Op, 1);
+  SDValue Nodes1;
+  SDValue second;
+  SDValue third;
+  SDValue fourth;
+  DebugLoc DL = Op.getDebugLoc();
+  Nodes1 = DAG.getNode(AMDILISD::VBUILD,
+                       DL,
+                       VT, Op.getOperand(0));
+  bool allEqual = true;
+  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
+    if (Op.getOperand(0) != Op.getOperand(x)) {
+      allEqual = false;
+      break;
+    }
+  }
+  if (allEqual) {
+    return Nodes1;
+  }
+  switch(Op.getNumOperands()) {
+  default:
+  case 1:
+    break;
+  case 4:
+    fourth = Op.getOperand(3);
+    if (fourth.getOpcode() != ISD::UNDEF) {
+      Nodes1 = DAG.getNode(
+                 ISD::INSERT_VECTOR_ELT,
+                 DL,
+                 Op.getValueType(),
+                 Nodes1,
+                 fourth,
+                 DAG.getConstant(7, MVT::i32));
+    }
+  case 3:
+    third = Op.getOperand(2);
+    if (third.getOpcode() != ISD::UNDEF) {
+      Nodes1 = DAG.getNode(
+                 ISD::INSERT_VECTOR_ELT,
+                 DL,
+                 Op.getValueType(),
+                 Nodes1,
+                 third,
+                 DAG.getConstant(6, MVT::i32));
+    }
+  case 2:
+    second = Op.getOperand(1);
+    if (second.getOpcode() != ISD::UNDEF) {
+      Nodes1 = DAG.getNode(
+                 ISD::INSERT_VECTOR_ELT,
+                 DL,
+                 Op.getValueType(),
+                 Nodes1,
+                 second,
+                 DAG.getConstant(5, MVT::i32));
+    }
+    break;
+  };
+  return Nodes1;
+}
+
+SDValue
+AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+    SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  const SDValue *ptr = NULL;
+  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+  uint32_t swizzleNum = 0;
+  SDValue DST;
+  if (!VT.isVector()) {
+    SDValue Res = Op.getOperand(0);
+    return Res;
+  }
+
+  if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
+    ptr = &Op.getOperand(1);
+  } else {
+    ptr = &Op.getOperand(0);
+  }
+  if (CSDN) {
+    swizzleNum = (uint32_t)CSDN->getZExtValue();
+    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
+    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
+    DST = DAG.getNode(AMDILISD::VINSERT,
+                      DL,
+                      VT,
+                      Op.getOperand(0),
+                      *ptr,
+                      DAG.getTargetConstant(mask2, MVT::i32),
+                      DAG.getTargetConstant(mask3, MVT::i32));
+  } else {
+    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
+    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
+    SDValue res = DAG.getNode(AMDILISD::VINSERT,
+                              DL, VT, Op.getOperand(0), *ptr,
+                              DAG.getTargetConstant(mask2, MVT::i32),
+                              DAG.getTargetConstant(mask3, MVT::i32));
+    for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
+      mask2 = 0x04030201 & ~(0xFF << (x * 8));
+      mask3 = 0x01010101 & (0xFF << (x * 8));
+      SDValue t = DAG.getNode(AMDILISD::VINSERT,
+                              DL, VT, Op.getOperand(0), *ptr,
+                              DAG.getTargetConstant(mask2, MVT::i32),
+                              DAG.getTargetConstant(mask3, MVT::i32));
+      SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
+                              DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
+                              Op.getOperand(2), DAG.getConstant(x, MVT::i32));
+      c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
+      res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
+    }
+    DST = res;
+  }
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+    SelectionDAG &DAG) const
+{
+  EVT VT = Op.getValueType();
+  //printSDValue(Op, 1);
+  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  uint64_t swizzleNum = 0;
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue Res;
+  if (!Op.getOperand(0).getValueType().isVector()) {
+    Res = Op.getOperand(0);
+    return Res;
+  }
+  if (CSDN) {
+    // Static vector extraction
+    swizzleNum = CSDN->getZExtValue() + 1;
+    Res = DAG.getNode(AMDILISD::VEXTRACT,
+                      DL, VT,
+                      Op.getOperand(0),
+                      DAG.getTargetConstant(swizzleNum, MVT::i32));
+  } else {
+    SDValue Op1 = Op.getOperand(1);
+    uint32_t vecSize = 4;
+    SDValue Op0 = Op.getOperand(0);
+    SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
+                              DL, VT, Op0,
+                              DAG.getTargetConstant(1, MVT::i32));
+    if (Op0.getValueType().isVector()) {
+      vecSize = Op0.getValueType().getVectorNumElements();
+    }
+    for (uint32_t x = 2; x <= vecSize; ++x) {
+      SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
+                              DL, VT, Op0,
+                              DAG.getTargetConstant(x, MVT::i32));
+      SDValue c = DAG.getNode(AMDILISD::CMP,
+                              DL, Op1.getValueType(),
+                              DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
+                              Op1, DAG.getConstant(x, MVT::i32));
+      res = DAG.getNode(AMDILISD::CMOVLOG, DL,
+                        VT, c, t, res);
+
+    }
+    Res = res;
+  }
+  return Res;
+}
+
+SDValue
+AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+    SelectionDAG &DAG) const
+{
+  uint32_t vecSize = Op.getValueType().getVectorNumElements();
+  SDValue src = Op.getOperand(0);
+  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  uint64_t offset = 0;
+  EVT vecType = Op.getValueType().getVectorElementType();
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue Result;
+  if (CSDN) {
+    offset = CSDN->getZExtValue();
+    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                         DL,vecType, src, DAG.getConstant(offset, MVT::i32));
+    Result = DAG.getNode(AMDILISD::VBUILD, DL,
+                         Op.getValueType(), Result);
+    for (uint32_t x = 1; x < vecSize; ++x) {
+      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
+                                src, DAG.getConstant(offset + x, MVT::i32));
+      if (elt.getOpcode() != ISD::UNDEF) {
+        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+                             Op.getValueType(), Result, elt,
+                             DAG.getConstant(x, MVT::i32));
+      }
+    }
+  } else {
+    SDValue idx = Op.getOperand(1);
+    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                         DL, vecType, src, idx);
+    Result = DAG.getNode(AMDILISD::VBUILD, DL,
+                         Op.getValueType(), Result);
+    for (uint32_t x = 1; x < vecSize; ++x) {
+      idx = DAG.getNode(ISD::ADD, DL, vecType,
+                        idx, DAG.getConstant(1, MVT::i32));
+      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
+                                src, idx);
+      if (elt.getOpcode() != ISD::UNDEF) {
+        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+                             Op.getValueType(), Result, elt, idx);
+      }
+    }
+  }
+  return Result;
+}
+SDValue
+AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+    SelectionDAG &DAG) const
+{
+  SDValue Res = DAG.getNode(AMDILISD::VBUILD,
+                            Op.getDebugLoc(),
+                            Op.getValueType(),
+                            Op.getOperand(0));
+  return Res;
+}
+SDValue
+AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue andOp;
+  andOp = DAG.getNode(
+            AMDILISD::AND,
+            Op.getDebugLoc(),
+            Op.getValueType(),
+            Op.getOperand(0),
+            Op.getOperand(1));
+  return andOp;
+}
+SDValue
+AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue orOp;
+  orOp = DAG.getNode(AMDILISD::OR,
+                     Op.getDebugLoc(),
+                     Op.getValueType(),
+                     Op.getOperand(0),
+                     Op.getOperand(1));
+  return orOp;
+}
+SDValue
+AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Cond = Op.getOperand(0);
+  SDValue LHS = Op.getOperand(1);
+  SDValue RHS = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
+  Cond = getConversionNode(DAG, Cond, Op, true);
+  Cond = DAG.getNode(AMDILISD::CMOVLOG,
+                     DL,
+                     Op.getValueType(), Cond, LHS, RHS);
+  return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Cond;
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TRUE = Op.getOperand(2);
+  SDValue FALSE = Op.getOperand(3);
+  SDValue CC = Op.getOperand(4);
+  DebugLoc DL = Op.getDebugLoc();
+  bool skipCMov = false;
+  bool genINot = false;
+  EVT OVT = Op.getValueType();
+
+  // Check for possible elimination of cmov
+  if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
+    const ConstantSDNode *trueConst
+    = dyn_cast<ConstantSDNode>( TRUE.getNode() );
+    const ConstantSDNode *falseConst
+    = dyn_cast<ConstantSDNode>( FALSE.getNode() );
+    if (trueConst && falseConst) {
+      // both possible result values are constants
+      if (trueConst->isAllOnesValue()
+          && falseConst->isNullValue()) { // and convenient constants
+        skipCMov = true;
+      } else if (trueConst->isNullValue()
+                 && falseConst->isAllOnesValue()) { // less convenient
+        skipCMov = true;
+        genINot = true;
+      }
+    }
+  }
+  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+  unsigned int AMDILCC = CondCCodeToCC(
+                           SetCCOpcode,
+                           LHS.getValueType().getSimpleVT().SimpleTy);
+  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+  Cond = DAG.getNode(
+           AMDILISD::CMP,
+           DL,
+           LHS.getValueType(),
+           DAG.getConstant(AMDILCC, MVT::i32),
+           LHS,
+           RHS);
+  Cond = getConversionNode(DAG, Cond, Op, true);
+  if (genINot) {
+    Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
+  }
+  if (!skipCMov) {
+    Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
+  }
+  return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Cond;
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue CC  = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
+  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+  unsigned int AMDILCC = CondCCodeToCC(
+                           SetCCOpcode,
+                           LHS.getValueType().getSimpleVT().SimpleTy);
+  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+  Cond = DAG.getNode(
+           AMDILISD::CMP,
+           DL,
+           LHS.getValueType(),
+           DAG.getConstant(AMDILCC, MVT::i32),
+           LHS,
+           RHS);
+  Cond = getConversionNode(DAG, Cond, Op, true);
+  Cond = DAG.getNode(
+           ISD::AND,
+           DL,
+           Cond.getValueType(),
+           DAG.getConstant(1, Cond.getValueType()),
+           Cond);
+  return Cond;
+}
+
+SDValue
+AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Data = Op.getOperand(0);
+  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+  DebugLoc DL = Op.getDebugLoc();
+  EVT DVT = Data.getValueType();
+  EVT BVT = BaseType->getVT();
+  unsigned baseBits = BVT.getScalarType().getSizeInBits();
+  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+  unsigned shiftBits = srcBits - baseBits;
+  if (srcBits < 32) {
+    // If the op is less than 32 bits, then it needs to extend to 32bits
+    // so it can properly keep the upper bits valid.
+    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+    shiftBits = 32 - baseBits;
+    DVT = IVT;
+  }
+  SDValue Shift = DAG.getConstant(shiftBits, DVT);
+  // Shift left by 'Shift' bits.
+  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+  // Signed shift Right by 'Shift' bits.
+  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+  if (srcBits < 32) {
+    // Once the sign extension is done, the op needs to be converted to
+    // its original type.
+    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+  }
+  return Data;
+}
+EVT
+AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
+{
+  int iSize = (size * numEle);
+  int vEle = (iSize >> ((size == 64) ? 6 : 5));
+  if (!vEle) {
+    vEle = 1;
+  }
+  if (size == 64) {
+    if (vEle == 1) {
+      return EVT(MVT::i64);
+    } else {
+      return EVT(MVT::getVectorVT(MVT::i64, vEle));
+    }
+  } else {
+    if (vEle == 1) {
+      return EVT(MVT::i32);
+    } else {
+      return EVT(MVT::getVectorVT(MVT::i32, vEle));
+    }
+  }
+}
+
+SDValue
+AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Src = Op.getOperand(0);
+  SDValue Dst = Op;
+  SDValue Res;
+  DebugLoc DL = Op.getDebugLoc();
+  EVT SrcVT = Src.getValueType();
+  EVT DstVT = Dst.getValueType();
+  // Lets bitcast the floating point types to an
+  // equivalent integer type before converting to vectors.
+  if (SrcVT.getScalarType().isFloatingPoint()) {
+    Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
+                        SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
+                        SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
+                      Src);
+    SrcVT = Src.getValueType();
+  }
+  uint32_t ScalarSrcSize = SrcVT.getScalarType()
+                           .getSimpleVT().getSizeInBits();
+  uint32_t ScalarDstSize = DstVT.getScalarType()
+                           .getSimpleVT().getSizeInBits();
+  uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+  uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
+  bool isVec = SrcVT.isVector();
+  if (DstVT.getScalarType().isInteger() &&
+      (SrcVT.getScalarType().isInteger()
+       || SrcVT.getScalarType().isFloatingPoint())) {
+    if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
+        || (ScalarSrcSize == 64
+            && DstNumEle == 4
+            && ScalarDstSize == 16)) {
+      // This is the problematic case when bitcasting i64 <-> <4 x i16>
+      // This approach is a little different as we cannot generate a
+      // <4 x i64> vector
+      // as that is illegal in our backend and we are already past
+      // the DAG legalizer.
+      // So, in this case, we will do the following conversion.
+      // Case 1:
+      // %dst = <4 x i16> %src bitconvert i64 ==>
+      // %tmp = <4 x i16> %src convert <4 x i32>
+      // %tmp = <4 x i32> %tmp and 0xFFFF
+      // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
+      // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
+      // %dst = <2 x i32> %tmp bitcast i64
+      // case 2:
+      // %dst = i64 %src bitconvert <4 x i16> ==>
+      // %tmp = i64 %src bitcast <2 x i32>
+      // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
+      // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
+      // %tmp = <4 x i32> %tmp and 0xFFFF
+      // %dst = <4 x i16> %tmp bitcast <4 x i32>
+      SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
+                                 DAG.getConstant(0xFFFF, MVT::i32));
+      SDValue const16 = DAG.getConstant(16, MVT::i32);
+      if (ScalarDstSize == 64) {
+        // case 1
+        Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
+        Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
+        SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+                                Op, DAG.getConstant(0, MVT::i32));
+        SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+                                Op, DAG.getConstant(1, MVT::i32));
+        y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
+        SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+                                Op, DAG.getConstant(2, MVT::i32));
+        SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+                                Op, DAG.getConstant(3, MVT::i32));
+        w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
+        x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
+        y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
+        Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
+        return Res;
+      } else {
+        // case 2
+        SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
+        SDValue lor16
+        = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
+        SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
+        SDValue hir16
+        = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
+        SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
+                                     MVT::v4i32, lo);
+        SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                                     MVT::i32, DAG.getConstant(1, MVT::i32));
+        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+                             resVec, lor16, idxVal);
+        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                             MVT::i32, DAG.getConstant(2, MVT::i32));
+        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+                             resVec, hi, idxVal);
+        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                             MVT::i32, DAG.getConstant(3, MVT::i32));
+        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+                             resVec, hir16, idxVal);
+        resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
+        Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
+        return Res;
+      }
+    } else {
+      // There are four cases we need to worry about for bitcasts
+      // where the size of all
+      // source, intermediates and result is <= 128 bits, unlike
+      // the above case
+      // 1) Sub32bit bitcast 32bitAlign
+      // %dst = <4 x i8> bitcast i32
+      // (also <[2|4] x i16> to <[2|4] x i32>)
+      // 2) 32bitAlign bitcast Sub32bit
+      // %dst = i32 bitcast <4 x i8>
+      // 3) Sub32bit bitcast LargerSub32bit
+      // %dst = <2 x i8> bitcast i16
+      // (also <4 x i8> to <2 x i16>)
+      // 4) Sub32bit bitcast SmallerSub32bit
+      // %dst = i16 bitcast <2 x i8>
+      // (also <2 x i16> to <4 x i8>)
+      // This also only handles types that are powers of two
+      if ((ScalarDstSize & (ScalarDstSize - 1))
+          || (ScalarSrcSize & (ScalarSrcSize - 1))) {
+      } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
+        // case 1:
+        EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
+#if 0 // FIXME: LLVM does not like this for some reason, cannot SignExt vectors
+        SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
+#else
+        SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+                                  DAG.getConstant(0, IntTy));
+        for (uint32_t x = 0; x < SrcNumEle; ++x) {
+          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                                    MVT::i32, DAG.getConstant(x, MVT::i32));
+          SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                                     SrcVT.getScalarType(), Src,
+                                     DAG.getConstant(x, MVT::i32));
+          temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
+          res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
+                            res, temp, idx);
+        }
+#endif
+        SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+                                   DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
+        SDValue *newEle = new SDValue[SrcNumEle];
+        res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
+        for (uint32_t x = 0; x < SrcNumEle; ++x) {
+          newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                                  IntTy.getScalarType(), res,
+                                  DAG.getConstant(x, MVT::i32));
+        }
+        uint32_t Ratio = SrcNumEle / DstNumEle;
+        for (uint32_t x = 0; x < SrcNumEle; ++x) {
+          if (x % Ratio) {
+            newEle[x] = DAG.getNode(ISD::SHL, DL,
+                                    IntTy.getScalarType(), newEle[x],
+                                    DAG.getConstant(ScalarSrcSize * (x % Ratio),
+                                                    MVT::i32));
+          }
+        }
+        for (uint32_t x = 0; x < SrcNumEle; x += 2) {
+          newEle[x] = DAG.getNode(ISD::OR, DL,
+                                  IntTy.getScalarType(), newEle[x], newEle[x + 1]);
+        }
+        if (ScalarSrcSize == 8) {
+          for (uint32_t x = 0; x < SrcNumEle; x += 4) {
+            newEle[x] = DAG.getNode(ISD::OR, DL,
+                                    IntTy.getScalarType(), newEle[x], newEle[x + 2]);
+          }
+          if (DstNumEle == 1) {
+            Dst = newEle[0];
+          } else {
+            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
+                              newEle[0]);
+            for (uint32_t x = 1; x < DstNumEle; ++x) {
+              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                                        MVT::i32, DAG.getConstant(x, MVT::i32));
+              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+                                DstVT, Dst, newEle[x * 4], idx);
+            }
+          }
+        } else {
+          if (DstNumEle == 1) {
+            Dst = newEle[0];
+          } else {
+            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
+                              newEle[0]);
+            for (uint32_t x = 1; x < DstNumEle; ++x) {
+              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                                        MVT::i32, DAG.getConstant(x, MVT::i32));
+              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+                                DstVT, Dst, newEle[x * 2], idx);
+            }
+          }
+        }
+        delete [] newEle;
+        return Dst;
+      } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
+        // case 2:
+        EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
+        SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+                                  DAG.getConstant(0, IntTy));
+        uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
+        for (uint32_t x = 0; x < SrcNumEle; ++x) {
+          for (uint32_t y = 0; y < mult; ++y) {
+            SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                                      MVT::i32,
+                                      DAG.getConstant(x * mult + y, MVT::i32));
+            SDValue t;
+            if (SrcNumEle > 1) {
+              t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                              DL, SrcVT.getScalarType(), Src,
+                              DAG.getConstant(x, MVT::i32));
+            } else {
+              t = Src;
+            }
+            if (y != 0) {
+              t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
+                              t, DAG.getConstant(y * ScalarDstSize,
+                                                 MVT::i32));
+            }
+            vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
+                              DL, IntTy, vec, t, idx);
+          }
+        }
+        Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
+        return Dst;
+      } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
+        // case 3:
+        SDValue *numEle = new SDValue[SrcNumEle];
+        for (uint32_t x = 0; x < SrcNumEle; ++x) {
+          numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                                  MVT::i8, Src, DAG.getConstant(x, MVT::i32));
+          numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
+          numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
+                                  DAG.getConstant(0xFF, MVT::i16));
+        }
+        for (uint32_t x = 1; x < SrcNumEle; x += 2) {
+          numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
+                                  DAG.getConstant(8, MVT::i16));
+          numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
+                                      numEle[x-1], numEle[x]);
+        }
+        if (DstNumEle > 1) {
+          // If we are not a scalar i16, the only other case is a
+          // v2i16 since we can't have v8i8 at this point, v4i16
+          // cannot be generated
+          Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
+                            numEle[0]);
+          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                                    MVT::i32, DAG.getConstant(1, MVT::i32));
+          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
+                            Dst, numEle[2], idx);
+        } else {
+          Dst = numEle[0];
+        }
+        delete [] numEle;
+        return Dst;
+      } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
+        // case 4:
+        SDValue *numEle = new SDValue[DstNumEle];
+        for (uint32_t x = 0; x < SrcNumEle; ++x) {
+          numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                                      MVT::i16, Src, DAG.getConstant(x, MVT::i32));
+          numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
+                                          numEle[x * 2], DAG.getConstant(8, MVT::i16));
+        }
+        MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
+        Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
+        for (uint32_t x = 1; x < DstNumEle; ++x) {
+          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                                    MVT::i32, DAG.getConstant(x, MVT::i32));
+          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
+                            Dst, numEle[x], idx);
+        }
+        delete [] numEle;
+        ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
+        Res = DAG.getSExtOrTrunc(Dst, DL, ty);
+        return Res;
+      }
+    }
+  }
+  Res = DAG.getNode(AMDILISD::BITCONV,
+                    Dst.getDebugLoc(),
+                    Dst.getValueType(), Src);
+  return Res;
+}
+
+SDValue
+AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+    SelectionDAG &DAG) const
+{
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+  unsigned int SPReg = AMDIL::SP;
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue SP = DAG.getCopyFromReg(Chain,
+                                  DL,
+                                  SPReg, MVT::i32);
+  SDValue NewSP = DAG.getNode(ISD::ADD,
+                              DL,
+                              MVT::i32, SP, Size);
+  Chain = DAG.getCopyToReg(SP.getValue(1),
+                           DL,
+                           SPReg, NewSP);
+  SDValue Ops[2] = {NewSP, Chain};
+  Chain = DAG.getMergeValues(Ops, 2 ,DL);
+  return Chain;
+}
+SDValue
+AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Chain = Op.getOperand(0);
+  SDValue Cond  = Op.getOperand(1);
+  SDValue Jump  = Op.getOperand(2);
+  SDValue Result;
+  Result = DAG.getNode(
+             AMDILISD::BRANCH_COND,
+             Op.getDebugLoc(),
+             Op.getValueType(),
+             Chain, Jump, Cond);
+  return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Chain = Op.getOperand(0);
+  CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
+  SDValue LHS   = Op.getOperand(2);
+  SDValue RHS   = Op.getOperand(3);
+  SDValue JumpT  = Op.getOperand(4);
+  SDValue CmpValue;
+  ISD::CondCode CC = CCNode->get();
+  SDValue Result;
+  unsigned int cmpOpcode = CondCCodeToCC(
+                             CC,
+                             LHS.getValueType().getSimpleVT().SimpleTy);
+  CmpValue = DAG.getNode(
+               AMDILISD::CMP,
+               Op.getDebugLoc(),
+               LHS.getValueType(),
+               DAG.getConstant(cmpOpcode, MVT::i32),
+               LHS, RHS);
+  Result = DAG.getNode(
+             AMDILISD::BRANCH_COND,
+             CmpValue.getDebugLoc(),
+             MVT::Other, Chain,
+             JumpT, CmpValue);
+  return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Result = DAG.getNode(
+                     AMDILISD::DP_TO_FP,
+                     Op.getDebugLoc(),
+                     Op.getValueType(),
+                     Op.getOperand(0),
+                     Op.getOperand(1));
+  return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Result = DAG.getNode(
+                     AMDILISD::VCONCAT,
+                     Op.getDebugLoc(),
+                     Op.getValueType(),
+                     Op.getOperand(0),
+                     Op.getOperand(1));
+  return Result;
+}
+// LowerRET - Lower an ISD::RET node.
+SDValue
+AMDILTargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 DebugLoc dl, SelectionDAG &DAG)
+const
+{
+  //MachineFunction& MF = DAG.getMachineFunction();
+  // CCValAssign - represent the assignment of the return value
+  // to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  // Analyze return values of ISD::RET
+  CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function
+  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
+      MRI.addLiveOut(RVLocs[i].getLocReg());
+    }
+  }
+  // FIXME: implement this when tail call is implemented
+  // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
+  // both x86 and ppc implement this in ISelLowering
+
+  // Regular return here
+  SDValue Flag;
+  SmallVector<SDValue, 6> RetOps;
+  RetOps.push_back(Chain);
+  RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
+  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+    CCValAssign &VA = RVLocs[i];
+    SDValue ValToCopy = OutVals[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    // ISD::Ret => ret chain, (regnum1, val1), ...
+    // So i * 2 + 1 index only the regnums
+    Chain = DAG.getCopyToReg(Chain,
+                             dl,
+                             VA.getLocReg(),
+                             ValToCopy,
+                             Flag);
+    // guarantee that all emitted copies are stuck together
+    // avoiding something bad
+    Flag = Chain.getValue(1);
+  }
+  /*if (MF.getFunction()->hasStructRetAttr()) {
+    assert(0 && "Struct returns are not yet implemented!");
+  // Both MIPS and X86 have this
+  }*/
+  RetOps[0] = Chain;
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  Flag = DAG.getNode(AMDILISD::RET_FLAG,
+                     dl,
+                     MVT::Other, &RetOps[0], RetOps.size());
+  return Flag;
+}
+void
+AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
+    unsigned int opCode) const
+{
+  MachineOperand DST = MI->getOperand(0);
+  MachineOperand LHS = MI->getOperand(2);
+  MachineOperand RHS = MI->getOperand(3);
+  unsigned int opi32Code = 0, si32Code = 0;
+  unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
+  uint32_t REGS[12];
+  switch (simpleVT)  {
+  case AMDIL::GPRI64RegClassID:
+    simpleVT = AMDIL::GPRI32RegClassID;
+    break;
+  case AMDIL::GPRV2I64RegClassID:
+    simpleVT = AMDIL::GPRV2I64RegClassID;
+    break;
+  };
+  // All the relationals can be generated with with 6 temp registers
+  for (int x = 0; x < 12; ++x) {
+    REGS[x] = genVReg(simpleVT);
+  }
+  // Pull out the high and low components of each 64 bit register
+  generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
+  generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
+  generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
+  generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
+  // Determine the correct opcode that we should use
+  switch(opCode) {
+  default:
+    assert(!"comparison case not handled!");
+    break;
+  case AMDIL::LEQ:
+    si32Code = opi32Code = AMDIL::IEQ;
+    break;
+  case AMDIL::LNE:
+    si32Code = opi32Code = AMDIL::INE;
+    break;
+  case AMDIL::LLE:
+  case AMDIL::ULLE:
+  case AMDIL::LGE:
+  case AMDIL::ULGE:
+    if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
+      std::swap(REGS[0], REGS[2]);
+    } else {
+      std::swap(REGS[1], REGS[3]);
+    }
+    if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
+      opi32Code = AMDIL::ILT;
+    } else {
+      opi32Code = AMDIL::ULT;
+    }
+    si32Code = AMDIL::UGE;
+    break;
+  case AMDIL::LGT:
+  case AMDIL::ULGT:
+    std::swap(REGS[0], REGS[2]);
+    std::swap(REGS[1], REGS[3]);
+  case AMDIL::LLT:
+  case AMDIL::ULLT:
+    if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
+      opi32Code = AMDIL::ILT;
+    } else {
+      opi32Code = AMDIL::ULT;
+    }
+    si32Code = AMDIL::ULT;
+    break;
+  };
+  // Do the initial opcode on the high and low components.
+  // This leaves the following:
+  // REGS[4] = L_HI OP R_HI
+  // REGS[5] = L_LO OP R_LO
+  generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
+  generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
+  switch(opi32Code) {
+  case AMDIL::IEQ:
+  case AMDIL::INE: {
+    // combine the results with an and or or depending on if
+    // we are eq or ne
+    uint32_t combineOp = (opi32Code == AMDIL::IEQ)
+                         ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
+    generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
+  }
+  break;
+  default:
+    // this finishes codegen for the following pattern
+    // REGS[4] || (REGS[5] && (L_HI == R_HI))
+    generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
+    generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
+                        REGS[9]);
+    generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
+                        REGS[10]);
+    break;
+  }
+  generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
+}
+
+unsigned int
+AMDILTargetLowering::getFunctionAlignment(const Function *) const
+{
+  return 0;
+}
+
+bool
+AMDILTargetLowering::isLoadBitCastBeneficial(EVT lVT, EVT bVT) const
+{
+  return !(lVT.getSizeInBits() == bVT.getSizeInBits()
+           && lVT.getScalarType().getSizeInBits() > bVT.getScalarType().getSizeInBits()
+           && bVT.getScalarType().getSizeInBits() < 32
+           && lVT.getScalarType().getSizeInBits() >= 32);
+}
+
+void
+AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
+                                    MachineBasicBlock::iterator &BBI,
+                                    DebugLoc *DL, const TargetInstrInfo *TII) const
+{
+  mBB = BB;
+  mBBI = BBI;
+  mDL = DL;
+  mTII = TII;
+}
+uint32_t
+AMDILTargetLowering::genVReg(uint32_t regType) const
+{
+  return mBB->getParent()->getRegInfo().createVirtualRegister(
+           getRegClassFromID(regType));
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
+{
+  return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+    uint32_t src1) const
+{
+  return generateMachineInst(opcode, dst).addReg(src1);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+    uint32_t src1, uint32_t src2) const
+{
+  return generateMachineInst(opcode, dst, src1).addReg(src2);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+    uint32_t src1, uint32_t src2, uint32_t src3) const
+{
+  return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
+}
+
+
+SDValue
+AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  MVT INTTY;
+  MVT FLTTY;
+  if (!OVT.isVector()) {
+    INTTY = MVT::i32;
+    FLTTY = MVT::f32;
+  } else if (OVT.getVectorNumElements() == 2) {
+    INTTY = MVT::v2i32;
+    FLTTY = MVT::v2f32;
+  } else if (OVT.getVectorNumElements() == 4) {
+    INTTY = MVT::v4i32;
+    FLTTY = MVT::v4f32;
+  }
+  unsigned bitsize = OVT.getScalarType().getSizeInBits();
+  // char|short jq = ia ^ ib;
+  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+  // jq = jq >> (bitsize - 2)
+  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+  // jq = jq | 0x1
+  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+  jq = DAG.getSExtOrTrunc(jq, DL, OVT);
+
+  // int ia = (int)LHS;
+  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+  // int ib, (int)RHS;
+  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+  // float fa = (float)ia;
+  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+  // float fb = (float)ib;
+  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+  // float fq = native_divide(fa, fb);
+  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+  // fq = trunc(fq);
+  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+  // float fqneg = -fq;
+  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+  // float fr = mad(fqneg, fb, fa);
+  SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
+
+  // int iq = (int)fq;
+  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+  // fr = fabs(fr);
+  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+  // fb = fabs(fb);
+  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+  // int cv = fr >= fb;
+  SDValue cv = DAG.getSetCC(DL, OVT, fr, fb, ISD::SETOGE);
+  // jq = (cv ? jq : 0);
+  jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, DAG.getConstant(0, OVT));
+  // dst = iq + jq;
+  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+  return iq;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  // The LowerSDIV32 function generates equivalent to the following IL.
+  // mov r0, LHS
+  // mov r1, RHS
+  // ilt r10, r0, 0
+  // ilt r11, r1, 0
+  // iadd r0, r0, r10
+  // iadd r1, r1, r11
+  // ixor r0, r0, r10
+  // ixor r1, r1, r11
+  // udiv r0, r0, r1
+  // ixor r10, r10, r11
+  // iadd r0, r0, r10
+  // ixor DST, r0, r10
+
+  // mov r0, LHS
+  SDValue r0 = LHS;
+
+  // mov r1, RHS
+  SDValue r1 = RHS;
+
+  // ilt r10, r0, 0
+  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+                            DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+                            r0, DAG.getConstant(0, OVT));
+
+  // ilt r11, r1, 0
+  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+                            DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+                            r1, DAG.getConstant(0, OVT));
+
+  // iadd r0, r0, r10
+  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+  // iadd r1, r1, r11
+  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+  // ixor r0, r0, r10
+  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+  // ixor r1, r1, r11
+  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+  // udiv r0, r0, r1
+  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+  // ixor r10, r10, r11
+  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+  // iadd r0, r0, r10
+  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+  // ixor DST, r0, r10
+  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+  return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  MVT INTTY;
+  MVT FLTTY;
+  if (!OVT.isVector()) {
+    INTTY = MVT::i32;
+    FLTTY = MVT::f32;
+  } else if (OVT.getVectorNumElements() == 2) {
+    INTTY = MVT::v2i32;
+    FLTTY = MVT::v2f32;
+  } else if (OVT.getVectorNumElements() == 4) {
+    INTTY = MVT::v4i32;
+    FLTTY = MVT::v4f32;
+  }
+
+  // The LowerUDIV24 function implements the following CL.
+  // int ia = (int)LHS
+  // float fa = (float)ia
+  // int ib = (int)RHS
+  // float fb = (float)ib
+  // float fq = native_divide(fa, fb)
+  // fq = trunc(fq)
+  // float t = mad(fq, fb, fb)
+  // int iq = (int)fq - (t <= fa)
+  // return (type)iq
+
+  // int ia = (int)LHS
+  SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
+
+  // float fa = (float)ia
+  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+  // int ib = (int)RHS
+  SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
+
+  // float fb = (float)ib
+  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+  // float fq = native_divide(fa, fb)
+  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+  // fq = trunc(fq)
+  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+  // float t = mad(fq, fb, fb)
+  SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
+
+  // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
+  SDValue iq;
+  fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+  if (INTTY == MVT::i32) {
+    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
+  } else {
+    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
+  }
+  iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
+
+
+  // return (type)iq
+  iq = DAG.getZExtOrTrunc(iq, DL, OVT);
+  return iq;
+
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+  return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+  return SDValue(Op.getNode(), 0);
+}
+SDValue
+AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  MVT INTTY = MVT::i32;
+  if (OVT == MVT::v2i8) {
+    INTTY = MVT::v2i32;
+  } else if (OVT == MVT::v4i8) {
+    INTTY = MVT::v4i32;
+  }
+  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+  return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  MVT INTTY = MVT::i32;
+  if (OVT == MVT::v2i16) {
+    INTTY = MVT::v2i32;
+  } else if (OVT == MVT::v4i16) {
+    INTTY = MVT::v4i32;
+  }
+  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+  return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  // The LowerSREM32 function generates equivalent to the following IL.
+  // mov r0, LHS
+  // mov r1, RHS
+  // ilt r10, r0, 0
+  // ilt r11, r1, 0
+  // iadd r0, r0, r10
+  // iadd r1, r1, r11
+  // ixor r0, r0, r10
+  // ixor r1, r1, r11
+  // udiv r20, r0, r1
+  // umul r20, r20, r1
+  // sub r0, r0, r20
+  // iadd r0, r0, r10
+  // ixor DST, r0, r10
+
+  // mov r0, LHS
+  SDValue r0 = LHS;
+
+  // mov r1, RHS
+  SDValue r1 = RHS;
+
+  // ilt r10, r0, 0
+  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+                            DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+                            r0, DAG.getConstant(0, OVT));
+
+  // ilt r11, r1, 0
+  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+                            DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+                            r1, DAG.getConstant(0, OVT));
+
+  // iadd r0, r0, r10
+  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+  // iadd r1, r1, r11
+  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+  // ixor r0, r0, r10
+  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+  // ixor r1, r1, r11
+  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+  // udiv r20, r0, r1
+  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+  // umul r20, r20, r1
+  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
+
+  // sub r0, r0, r20
+  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+  // iadd r0, r0, r10
+  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+  // ixor DST, r0, r10
+  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
+{
+  return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  MVT INTTY = MVT::i32;
+  if (OVT == MVT::v2i8) {
+    INTTY = MVT::v2i32;
+  } else if (OVT == MVT::v4i8) {
+    INTTY = MVT::v4i32;
+  }
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  // The LowerUREM8 function generates equivalent to the following IL.
+  // mov r0, as_u32(LHS)
+  // mov r1, as_u32(RHS)
+  // and r10, r0, 0xFF
+  // and r11, r1, 0xFF
+  // cmov_logical r3, r11, r11, 0x1
+  // udiv r3, r10, r3
+  // cmov_logical r3, r11, r3, 0
+  // umul r3, r3, r11
+  // sub r3, r10, r3
+  // and as_u8(DST), r3, 0xFF
+
+  // mov r0, as_u32(LHS)
+  SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+  // mov r1, as_u32(RHS)
+  SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+  // and r10, r0, 0xFF
+  SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
+                            DAG.getConstant(0xFF, INTTY));
+
+  // and r11, r1, 0xFF
+  SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
+                            DAG.getConstant(0xFF, INTTY));
+
+  // cmov_logical r3, r11, r11, 0x1
+  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
+                           DAG.getConstant(0x01, INTTY));
+
+  // udiv r3, r10, r3
+  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
+
+  // cmov_logical r3, r11, r3, 0
+  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
+                   DAG.getConstant(0, INTTY));
+
+  // umul r3, r3, r11
+  r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
+
+  // sub r3, r10, r3
+  r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
+
+  // and as_u8(DST), r3, 0xFF
+  SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
+                            DAG.getConstant(0xFF, INTTY));
+  DST = DAG.getZExtOrTrunc(DST, DL, OVT);
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  MVT INTTY = MVT::i32;
+  if (OVT == MVT::v2i16) {
+    INTTY = MVT::v2i32;
+  } else if (OVT == MVT::v4i16) {
+    INTTY = MVT::v4i32;
+  }
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  // The LowerUREM16 function generatest equivalent to the following IL.
+  // mov r0, LHS
+  // mov r1, RHS
+  // DIV = LowerUDIV16(LHS, RHS)
+  // and r10, r0, 0xFFFF
+  // and r11, r1, 0xFFFF
+  // cmov_logical r3, r11, r11, 0x1
+  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
+  // and r3, r3, 0xFFFF
+  // cmov_logical r3, r11, r3, 0
+  // umul r3, r3, r11
+  // sub r3, r10, r3
+  // and DST, r3, 0xFFFF
+
+  // mov r0, LHS
+  SDValue r0 = LHS;
+
+  // mov r1, RHS
+  SDValue r1 = RHS;
+
+  // and r10, r0, 0xFFFF
+  SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
+                            DAG.getConstant(0xFFFF, OVT));
+
+  // and r11, r1, 0xFFFF
+  SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
+                            DAG.getConstant(0xFFFF, OVT));
+
+  // cmov_logical r3, r11, r11, 0x1
+  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
+                           DAG.getConstant(0x01, OVT));
+
+  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
+  r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
+  r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
+  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
+  r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
+  r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
+
+  // and r3, r3, 0xFFFF
+  r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
+                   DAG.getConstant(0xFFFF, OVT));
+
+  // cmov_logical r3, r11, r3, 0
+  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
+                   DAG.getConstant(0, OVT));
+  // umul r3, r3, r11
+  r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
+
+  // sub r3, r10, r3
+  r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
+
+  // and DST, r3, 0xFFFF
+  SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
+                            DAG.getConstant(0xFFFF, OVT));
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  // The LowerUREM32 function generates equivalent to the following IL.
+  // udiv r20, LHS, RHS
+  // umul r20, r20, RHS
+  // sub DST, LHS, r20
+
+  // udiv r20, LHS, RHS
+  SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
+
+  // umul r20, r20, RHS
+  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
+
+  // sub DST, LHS, r20
+  SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
+{
+  return SDValue(Op.getNode(), 0);
+}
+
+
+SDValue
+AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  MVT INTTY = MVT::i32;
+  if (OVT == MVT::v2f32) {
+    INTTY = MVT::v2i32;
+  } else if (OVT == MVT::v4f32) {
+    INTTY = MVT::v4i32;
+  }
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue DST;
+  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+                                &this->getTargetMachine())->getSubtargetImpl();
+  if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+    // TODO: This doesn't work for vector types yet
+    // The LowerFDIV32 function generates equivalent to the following
+    // IL:
+    // mov r20, as_int(LHS)
+    // mov r21, as_int(RHS)
+    // and r30, r20, 0x7f800000
+    // and r31, r20, 0x807FFFFF
+    // and r32, r21, 0x7f800000
+    // and r33, r21, 0x807FFFFF
+    // ieq r40, r30, 0x7F800000
+    // ieq r41, r31, 0x7F800000
+    // ieq r42, r32, 0
+    // ieq r43, r33, 0
+    // and r50, r20, 0x80000000
+    // and r51, r21, 0x80000000
+    // ior r32, r32, 0x3f800000
+    // ior r33, r33, 0x3f800000
+    // cmov_logical r32, r42, r50, r32
+    // cmov_logical r33, r43, r51, r33
+    // cmov_logical r32, r40, r20, r32
+    // cmov_logical r33, r41, r21, r33
+    // ior r50, r40, r41
+    // ior r51, r42, r43
+    // ior r50, r50, r51
+    // inegate r52, r31
+    // iadd r30, r30, r52
+    // cmov_logical r30, r50, 0, r30
+    // div_zeroop(infinity) r21, 1.0, r33
+    // mul_ieee r20, r32, r21
+    // and r22, r20, 0x7FFFFFFF
+    // and r23, r20, 0x80000000
+    // ishr r60, r22, 0x00000017
+    // ishr r61, r30, 0x00000017
+    // iadd r20, r20, r30
+    // iadd r21, r22, r30
+    // iadd r60, r60, r61
+    // ige r42, 0, R60
+    // ior r41, r23, 0x7F800000
+    // ige r40, r60, 0x000000FF
+    // cmov_logical r40, r50, 0, r40
+    // cmov_logical r20, r42, r23, r20
+    // cmov_logical DST, r40, r41, r20
+    // as_float(DST)
+
+    // mov r20, as_int(LHS)
+    SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
+
+    // mov r21, as_int(RHS)
+    SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
+
+    // and r30, r20, 0x7f800000
+    SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+                              DAG.getConstant(0x7F800000, INTTY));
+
+    // and r31, r21, 0x7f800000
+    SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+                              DAG.getConstant(0x7f800000, INTTY));
+
+    // and r32, r20, 0x807FFFFF
+    SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+                              DAG.getConstant(0x807FFFFF, INTTY));
+
+    // and r33, r21, 0x807FFFFF
+    SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+                              DAG.getConstant(0x807FFFFF, INTTY));
+
+    // ieq r40, r30, 0x7F800000
+    SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                              DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+                              R30, DAG.getConstant(0x7F800000, INTTY));
+
+    // ieq r41, r31, 0x7F800000
+    SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                              DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+                              R31, DAG.getConstant(0x7F800000, INTTY));
+
+    // ieq r42, r30, 0
+    SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                              DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+                              R30, DAG.getConstant(0, INTTY));
+
+    // ieq r43, r31, 0
+    SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                              DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+                              R31, DAG.getConstant(0, INTTY));
+
+    // and r50, r20, 0x80000000
+    SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+                              DAG.getConstant(0x80000000, INTTY));
+
+    // and r51, r21, 0x80000000
+    SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+                              DAG.getConstant(0x80000000, INTTY));
+
+    // ior r32, r32, 0x3f800000
+    R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
+                      DAG.getConstant(0x3F800000, INTTY));
+
+    // ior r33, r33, 0x3f800000
+    R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
+                      DAG.getConstant(0x3F800000, INTTY));
+
+    // cmov_logical r32, r42, r50, r32
+    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
+
+    // cmov_logical r33, r43, r51, r33
+    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
+
+    // cmov_logical r32, r40, r20, r32
+    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
+
+    // cmov_logical r33, r41, r21, r33
+    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
+
+    // ior r50, r40, r41
+    R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
+
+    // ior r51, r42, r43
+    R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
+
+    // ior r50, r50, r51
+    R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
+
+    // inegate r52, r31
+    SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
+
+    // iadd r30, r30, r52
+    R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
+
+    // cmov_logical r30, r50, 0, r30
+    R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
+                      DAG.getConstant(0, INTTY), R30);
+
+    // div_zeroop(infinity) r21, 1.0, as_float(r33)
+    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
+    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+                      DAG.getConstantFP(1.0f, OVT), R33);
+
+    // mul_ieee as_int(r20), as_float(r32), r21
+    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
+    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
+    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
+
+    // div_zeroop(infinity) r21, 1.0, as_float(r33)
+    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
+    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+                      DAG.getConstantFP(1.0f, OVT), R33);
+
+    // mul_ieee as_int(r20), as_float(r32), r21
+    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
+    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
+    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
+
+    // and r22, r20, 0x7FFFFFFF
+    SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+                              DAG.getConstant(0x7FFFFFFF, INTTY));
+
+    // and r23, r20, 0x80000000
+    SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+                              DAG.getConstant(0x80000000, INTTY));
+
+    // ishr r60, r22, 0x00000017
+    SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
+                              DAG.getConstant(0x00000017, INTTY));
+
+    // ishr r61, r30, 0x00000017
+    SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
+                              DAG.getConstant(0x00000017, INTTY));
+
+    // iadd r20, r20, r30
+    R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
+
+    // iadd r21, r22, r30
+    R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
+
+    // iadd r60, r60, r61
+    R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
+
+    // ige r42, 0, R60
+    R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                      DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+                      DAG.getConstant(0, INTTY),
+                      R60);
+
+    // ior r41, r23, 0x7F800000
+    R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
+                      DAG.getConstant(0x7F800000, INTTY));
+
+    // ige r40, r60, 0x000000FF
+    R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+                      DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+                      R60,
+                      DAG.getConstant(0x0000000FF, INTTY));
+
+    // cmov_logical r40, r50, 0, r40
+    R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
+                      DAG.getConstant(0, INTTY),
+                      R40);
+
+    // cmov_logical r20, r42, r23, r20
+    R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
+
+    // cmov_logical DST, r40, r41, r20
+    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
+
+    // as_float(DST)
+    DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
+  } else {
+    // The following sequence of DAG nodes produce the following IL:
+    // fabs r1, RHS
+    // lt r2, 0x1.0p+96f, r1
+    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
+    // mul_ieee r1, RHS, r3
+    // div_zeroop(infinity) r0, LHS, r1
+    // mul_ieee DST, r0, r3
+
+    // fabs r1, RHS
+    SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
+    // lt r2, 0x1.0p+96f, r1
+    SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+                             DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
+                             DAG.getConstant(0x6f800000, INTTY), r1);
+    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
+    SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
+                             DAG.getConstant(0x2f800000, INTTY),
+                             DAG.getConstant(0x3f800000, INTTY));
+    // mul_ieee r1, RHS, r3
+    r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
+    // div_zeroop(infinity) r0, LHS, r1
+    SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
+    // mul_ieee DST, r0, r3
+    DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
+  }
+  return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+  return SDValue(Op.getNode(), 0);
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,563 @@
+//===-- AMDILISelLowering.h -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AMDIL uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_ISELLOWERING_H_
+#define AMDIL_ISELLOWERING_H_
+#include "AMDIL.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm
+{
+namespace AMDILISD
+{
+enum {
+  FIRST_NUMBER = ISD::BUILTIN_OP_END,
+  INTTOANY,    // Dummy instruction that takes an int and goes to
+  // any type converts the SDNode to an int
+  DP_TO_FP,    // Conversion from 64bit FP to 32bit FP
+  FP_TO_DP,    // Conversion from 32bit FP to 64bit FP
+  BITCONV,     // instruction that converts from any type to any type
+  CMOV,        // 32bit FP Conditional move instruction
+  CMOVLOG,     // 32bit FP Conditional move logical instruction
+  SELECT,      // 32bit FP Conditional move logical instruction
+  SETCC,       // 32bit FP Conditional move logical instruction
+  ISGN,        // 32bit Int Sign instruction
+  INEGATE,     // 32bit Int Negation instruction
+  MAD,         // 32bit Fused Multiply Add instruction
+  ADD,         // 32/64 bit pseudo instruction
+  AND,         // 128 bit and instruction
+  OR,          // 128 bit or instruction
+  NOT,         // 128 bit not instruction
+  XOR,         // 128 bit xor instruction
+  MOVE,        // generic mov instruction
+  PHIMOVE,     // generic phi-node mov instruction
+  VBUILD,      // scalar to vector mov instruction
+  VEXTRACT,    // extract vector components
+  VINSERT,     // insert vector components
+  VCONCAT,     // concat a single vector to another vector
+  UMAD,        // 32bit UInt Fused Multiply Add instruction
+  CALL,        // Function call based on a single integer
+  RET,         // Return from a function call
+  SELECT_CC,   // Select the correct conditional instruction
+  BRCC,        // Select the correct branch instruction
+  CMPCC,       // Compare to GPR operands
+  CMPICC,      // Compare two GPR operands, set icc.
+  CMPFCC,      // Compare two FP operands, set fcc.
+  BRICC,       // Branch to dest on icc condition
+  BRFCC,       // Branch to dest on fcc condition
+  SELECT_ICC,  // Select between two values using the current ICC
+  //flags.
+  SELECT_FCC,  // Select between two values using the current FCC
+  //flags.
+  LCREATE,     // Create a 64bit integer from two 32 bit integers
+  LCOMPHI,     // Get the hi 32 bits from a 64 bit integer
+  LCOMPLO,     // Get the lo 32 bits from a 64 bit integer
+  DCREATE,     // Create a 64bit float from two 32 bit integers
+  DCOMPHI,     // Get the hi 32 bits from a 64 bit float
+  DCOMPLO,     // Get the lo 32 bits from a 64 bit float
+  LCREATE2,     // Create a 64bit integer from two 32 bit integers
+  LCOMPHI2,     // Get the hi 32 bits from a 64 bit integer
+  LCOMPLO2,     // Get the lo 32 bits from a 64 bit integer
+  DCREATE2,     // Create a 64bit float from two 32 bit integers
+  DCOMPHI2,     // Get the hi 32 bits from a 64 bit float
+  DCOMPLO2,     // Get the lo 32 bits from a 64 bit float
+  UMUL,        // 32bit unsigned multiplication
+  IFFB_HI,  // 32bit find first hi bit instruction
+  IFFB_LO,  // 32bit find first low bit instruction
+  DIV_INF,      // Divide with infinity returned on zero divisor
+  SMAX,        // Signed integer max
+  CMP,
+  IL_CC_I_GT,
+  IL_CC_I_LT,
+  IL_CC_I_GE,
+  IL_CC_I_LE,
+  IL_CC_I_EQ,
+  IL_CC_I_NE,
+  RET_FLAG,
+  BRANCH_COND,
+  LOOP_NZERO,
+  LOOP_ZERO,
+  LOOP_CMP,
+  ADDADDR,
+  // ATOMIC Operations
+  // Global Memory
+  ATOM_G_ADD = ISD::FIRST_TARGET_MEMORY_OPCODE,
+  ATOM_G_AND,
+  ATOM_G_CMPXCHG,
+  ATOM_G_DEC,
+  ATOM_G_INC,
+  ATOM_G_MAX,
+  ATOM_G_UMAX,
+  ATOM_G_MIN,
+  ATOM_G_UMIN,
+  ATOM_G_OR,
+  ATOM_G_SUB,
+  ATOM_G_RSUB,
+  ATOM_G_XCHG,
+  ATOM_G_XOR,
+  ATOM_G_ADD_NORET,
+  ATOM_G_AND_NORET,
+  ATOM_G_CMPXCHG_NORET,
+  ATOM_G_DEC_NORET,
+  ATOM_G_INC_NORET,
+  ATOM_G_MAX_NORET,
+  ATOM_G_UMAX_NORET,
+  ATOM_G_MIN_NORET,
+  ATOM_G_UMIN_NORET,
+  ATOM_G_OR_NORET,
+  ATOM_G_SUB_NORET,
+  ATOM_G_RSUB_NORET,
+  ATOM_G_XCHG_NORET,
+  ATOM_G_XOR_NORET,
+  // Local Memory
+  ATOM_L_ADD,
+  ATOM_L_AND,
+  ATOM_L_CMPXCHG,
+  ATOM_L_DEC,
+  ATOM_L_INC,
+  ATOM_L_MAX,
+  ATOM_L_UMAX,
+  ATOM_L_MIN,
+  ATOM_L_UMIN,
+  ATOM_L_OR,
+  ATOM_L_MSKOR,
+  ATOM_L_SUB,
+  ATOM_L_RSUB,
+  ATOM_L_XCHG,
+  ATOM_L_XOR,
+  ATOM_L_ADD_NORET,
+  ATOM_L_AND_NORET,
+  ATOM_L_CMPXCHG_NORET,
+  ATOM_L_DEC_NORET,
+  ATOM_L_INC_NORET,
+  ATOM_L_MAX_NORET,
+  ATOM_L_UMAX_NORET,
+  ATOM_L_MIN_NORET,
+  ATOM_L_UMIN_NORET,
+  ATOM_L_OR_NORET,
+  ATOM_L_MSKOR_NORET,
+  ATOM_L_SUB_NORET,
+  ATOM_L_RSUB_NORET,
+  ATOM_L_XCHG_NORET,
+  ATOM_L_XOR_NORET,
+  // Region Memory
+  ATOM_R_ADD,
+  ATOM_R_AND,
+  ATOM_R_CMPXCHG,
+  ATOM_R_DEC,
+  ATOM_R_INC,
+  ATOM_R_MAX,
+  ATOM_R_UMAX,
+  ATOM_R_MIN,
+  ATOM_R_UMIN,
+  ATOM_R_OR,
+  ATOM_R_MSKOR,
+  ATOM_R_SUB,
+  ATOM_R_RSUB,
+  ATOM_R_XCHG,
+  ATOM_R_XOR,
+  ATOM_R_ADD_NORET,
+  ATOM_R_AND_NORET,
+  ATOM_R_CMPXCHG_NORET,
+  ATOM_R_DEC_NORET,
+  ATOM_R_INC_NORET,
+  ATOM_R_MAX_NORET,
+  ATOM_R_UMAX_NORET,
+  ATOM_R_MIN_NORET,
+  ATOM_R_UMIN_NORET,
+  ATOM_R_OR_NORET,
+  ATOM_R_MSKOR_NORET,
+  ATOM_R_SUB_NORET,
+  ATOM_R_RSUB_NORET,
+  ATOM_R_XCHG_NORET,
+  ATOM_R_XOR_NORET,
+  // Append buffer
+  APPEND_ALLOC,
+  APPEND_CONSUME,
+  // 2D Images
+  IMAGE2D_READ,
+  IMAGE2D_WRITE,
+  IMAGE2D_INFO0,
+  IMAGE2D_INFO1,
+  // 3D Images
+  IMAGE3D_READ,
+  IMAGE3D_WRITE,
+  IMAGE3D_INFO0,
+  IMAGE3D_INFO1,
+  ATOM_F_ADD,
+  ATOM_F_AND,
+  ATOM_F_CMPXCHG,
+  ATOM_F_DEC,
+  ATOM_F_INC,
+  ATOM_F_MAX,
+  ATOM_F_UMAX,
+  ATOM_F_MIN,
+  ATOM_F_UMIN,
+  ATOM_F_OR,
+  ATOM_F_SUB,
+  ATOM_F_XCHG,
+  ATOM_F_XOR,
+
+  LAST_ISD_NUMBER
+};
+} // AMDILISD
+
+class MachineBasicBlock;
+class MachineInstr;
+class DebugLoc;
+class TargetInstrInfo;
+
+class AMDILTargetLowering : public TargetLowering
+{
+private:
+  int VarArgsFrameOffset;   // Frame offset to start of varargs area.
+public:
+  AMDILTargetLowering(TargetMachine &TM);
+
+  virtual MVT getShiftAmountTy(EVT LHSTy) const {
+    return MVT::i32;
+  }
+
+  virtual SDValue
+  LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+  int
+  getVarArgsFrameOffset() const;
+
+  /// computeMaskedBitsForTargetNode - Determine which of
+  /// the bits specified
+  /// in Mask are known to be either zero or one and return them in
+  /// the
+  /// KnownZero/KnownOne bitsets.
+  virtual void
+  computeMaskedBitsForTargetNode(
+    const SDValue Op,
+    APInt &KnownZero,
+    APInt &KnownOne,
+    const SelectionDAG &DAG,
+    unsigned Depth = 0
+  ) const;
+
+  virtual MachineBasicBlock*
+  EmitInstrWithCustomInserter(
+    MachineInstr *MI,
+    MachineBasicBlock *MBB) const;
+
+  virtual bool
+  getTgtMemIntrinsic(IntrinsicInfo &Info,
+                     const CallInst &I, unsigned Intrinsic) const;
+  virtual const char*
+  getTargetNodeName(
+    unsigned Opcode
+  ) const;
+
+  /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+  virtual EVT getSetCCResultType(EVT VT) const;
+
+  // We want to mark f32/f64 floating point values as
+  // legal
+  bool
+  isFPImmLegal(const APFloat &Imm, EVT VT) const;
+  // We don't want to shrink f64/f32 constants because
+  // they both take up the same amount of space and
+  // we don't want to use a f2d instruction.
+  bool ShouldShrinkFPConstant(EVT VT) const;
+
+  /// getFunctionAlignment - Return the Log2 alignment of this
+  /// function.
+  unsigned int
+  getFunctionAlignment(const Function *F) const;
+
+  /// This function returns true if the target allows unaligned memory accesses.
+  /// of the specified type. This is used, for example, in situations where an
+  /// array copy/move/set is  converted to a sequence of store operations. It's
+  /// use helps to ensure that such replacements don't generate code that causes
+  /// an alignment error  (trap) on the target machine.
+  /// @brief Determine if the target supports unaligned memory accesses.
+  bool allowsUnalignedMemoryAccesses(EVT VT) const;
+
+  /// Return true if the load uses larger data types than
+  /// the bitcast and false otherwise.
+  /// This should disable optimizing:
+  /// (char16)((int4*)ptr)[idx] => (char16*)ptr[idx]
+  /// but not disable:
+  /// (int4)((char16*)ptr)[idx] => (int4*)ptr[idx]
+  bool
+  isLoadBitCastBeneficial(EVT load, EVT bitcast) const;
+
+private:
+  CCAssignFn*
+  CCAssignFnForNode(unsigned int CC) const;
+
+  SDValue LowerCallResult(SDValue Chain,
+                          SDValue InFlag,
+                          CallingConv::ID CallConv,
+                          bool isVarArg,
+                          const SmallVectorImpl<ISD::InputArg> &Ins,
+                          DebugLoc dl,
+                          SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals) const;
+
+  SDValue LowerMemArgument(SDValue Chain,
+                           CallingConv::ID CallConv,
+                           const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           const CCValAssign &VA,  MachineFrameInfo *MFI,
+                           unsigned i) const;
+
+  SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
+                           SDValue Arg,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           const CCValAssign &VA,
+                           ISD::ArgFlagsTy Flags) const;
+
+  virtual SDValue
+  LowerFormalArguments(SDValue Chain,
+                       CallingConv::ID CallConv, bool isVarArg,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       DebugLoc dl, SelectionDAG &DAG,
+                       SmallVectorImpl<SDValue> &InVals) const;
+
+  virtual SDValue
+  LowerCall(SDValue Chain, SDValue Callee,
+            CallingConv::ID CallConv, bool doesNotRet,
+            bool isVarArg, bool &isTailCall,
+            const SmallVectorImpl<ISD::OutputArg> &Outs,
+            const SmallVectorImpl<SDValue> &OutVals,
+            const SmallVectorImpl<ISD::InputArg> &Ins,
+            DebugLoc dl, SelectionDAG &DAG,
+            SmallVectorImpl<SDValue> &InVals) const;
+
+  virtual SDValue
+  LowerReturn(SDValue Chain,
+              CallingConv::ID CallConv, bool isVarArg,
+              const SmallVectorImpl<ISD::OutputArg> &Outs,
+              const SmallVectorImpl<SDValue> &OutVals,
+              DebugLoc dl, SelectionDAG &DAG) const;
+
+  //+++--- Function dealing with conversions between floating point and
+  //integer types ---+++//
+  SDValue
+  genCLZu64(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  genCLZuN(SDValue Op, SelectionDAG &DAG, uint32_t bits) const;
+  SDValue
+  genCLZu32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  genf64toi32(SDValue Op, SelectionDAG &DAG,
+              bool includeSign) const;
+
+  SDValue
+  genf64toi64(SDValue Op, SelectionDAG &DAG,
+              bool includeSign) const;
+
+  SDValue
+  genu32tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const;
+
+  SDValue
+  genu64tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG& DAG) const;
+
+  SDValue
+  LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG& DAG) const;
+
+  SDValue
+  LowerINTRINSIC_VOID(SDValue Op, SelectionDAG& DAG) const;
+
+  SDValue
+  LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerADD(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerSUB(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerUREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerUREM8(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerUREM16(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerUREM32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerUREM64(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerUDIV(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerUDIV24(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerUDIV32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerUDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerAND(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerOR(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+
+  EVT
+  genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
+
+  SDValue
+  LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue
+  LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue
+  LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+  void
+  generateCMPInstr(MachineInstr*, MachineBasicBlock*,
+                   const TargetInstrInfo&) const;
+  MachineOperand
+  convertToReg(MachineOperand) const;
+
+  // private members used by the set of instruction generation
+  // functions, these are marked mutable as they are cached so
+  // that they don't have to constantly be looked up when using the
+  // generateMachineInst/genVReg instructions. This is to simplify
+  // the code
+  // and to make it cleaner. The object itself doesn't change as
+  // only these functions use these three data types.
+  mutable MachineBasicBlock *mBB;
+  mutable DebugLoc *mDL;
+  mutable const TargetInstrInfo *mTII;
+  mutable MachineBasicBlock::iterator mBBI;
+  void
+  setPrivateData(MachineBasicBlock *BB,
+                 MachineBasicBlock::iterator &BBI,
+                 DebugLoc *DL,
+                 const TargetInstrInfo *TII) const;
+  uint32_t genVReg(uint32_t regType) const;
+  MachineInstrBuilder
+  generateMachineInst(uint32_t opcode,
+                      uint32_t dst) const;
+  MachineInstrBuilder
+  generateMachineInst(uint32_t opcode,
+                      uint32_t dst, uint32_t src1) const;
+  MachineInstrBuilder
+  generateMachineInst(uint32_t opcode,
+                      uint32_t dst, uint32_t src1, uint32_t src2) const;
+  MachineInstrBuilder
+  generateMachineInst(uint32_t opcode,
+                      uint32_t dst, uint32_t src1, uint32_t src2,
+                      uint32_t src3) const;
+  uint32_t
+  addExtensionInstructions(
+    uint32_t reg, bool signedShift,
+    unsigned int simpleVT) const;
+  void
+  generateLongRelational(MachineInstr *MI,
+                         unsigned int opCode) const;
+
+}; // AMDILTargetLowering
+} // end namespace llvm
+
+#endif    // AMDIL_ISELLOWERING_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,171 @@
+//===-- AMDILImageExpansion.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementatino of the Image expansion class for image capable devices.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILIOExpansion.h"
+#include "AMDILKernelManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+AMDILImageExpansion::AMDILImageExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel)
+  : AMDIL789IOExpansion(tm, OptLevel)
+{
+}
+
+AMDILImageExpansion::~AMDILImageExpansion()
+{
+}
+void AMDILImageExpansion::expandInefficientImageLoad(
+  MachineBasicBlock *mBB, MachineInstr *MI)
+{
+#if 0
+  const llvm::StringRef &name = MI->getOperand(0).getGlobal()->getName();
+  const char *tReg1, *tReg2, *tReg3, *tReg4;
+  tReg1 = mASM->getRegisterName(MI->getOperand(1).getReg());
+  if (MI->getOperand(2).isReg()) {
+    tReg2 = mASM->getRegisterName(MI->getOperand(2).getReg());
+  } else {
+    tReg2 = mASM->getRegisterName(AMDIL::R1);
+    O << "\tmov " << tReg2 << ", l" << MI->getOperand(2).getImm() << "\n";
+  }
+  if (MI->getOperand(3).isReg()) {
+    tReg3 = mASM->getRegisterName(MI->getOperand(3).getReg());
+  } else {
+    tReg3 = mASM->getRegisterName(AMDIL::R2);
+    O << "\tmov " << tReg3 << ", l" << MI->getOperand(3).getImm() << "\n";
+  }
+  if (MI->getOperand(4).isReg()) {
+    tReg4 = mASM->getRegisterName(MI->getOperand(4).getReg());
+  } else {
+    tReg4 = mASM->getRegisterName(AMDIL::R3);
+    O << "\tmov " << tReg2 << ", l" << MI->getOperand(4).getImm() << "\n";
+  }
+  bool internalSampler = false;
+  //bool linear = true;
+  unsigned ImageCount = 3; // OPENCL_MAX_READ_IMAGES
+  unsigned SamplerCount = 3; // OPENCL_MAX_SAMPLERS
+  if (ImageCount - 1) {
+    O << "\tswitch " << mASM->getRegisterName(MI->getOperand(1).getReg())
+      << "\n";
+  }
+  for (unsigned rID = 0; rID < ImageCount; ++rID) {
+    if (ImageCount - 1)  {
+      if (!rID) {
+        O << "\tdefault\n";
+      } else {
+        O << "\tcase " << rID << "\n" ;
+      }
+      O << "\tswitch " << mASM->getRegisterName(MI->getOperand(2).getReg())
+        << "\n";
+    }
+    for (unsigned sID = 0; sID < SamplerCount; ++sID) {
+      if (SamplerCount - 1) {
+        if (!sID) {
+          O << "\tdefault\n";
+        } else {
+          O << "\tcase " << sID << "\n" ;
+        }
+      }
+      if (internalSampler) {
+        // Check if sampler has normalized setting.
+        O << "\tand r0.x, " << tReg2 << ".x, l0.y\n"
+          << "\tif_logicalz r0.x\n"
+          << "\tflr " << tReg3 << ", " << tReg3 << "\n"
+          << "\tsample_resource(" << rID << ")_sampler("
+          << sID << ")_coordtype(unnormalized) "
+          << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n"
+          << "\telse\n"
+          << "\tiadd " << tReg1 << ".y, " << tReg1 << ".x, l0.y\n"
+          << "\titof " << tReg2 << ", cb1[" << tReg1 << ".x].xyz\n"
+          << "\tmul " << tReg3 << ", " << tReg3 << ", " << tReg2 << "\n"
+          << "\tflr " << tReg3 << ", " << tReg3 << "\n"
+          << "\tmul " << tReg3 << ", " << tReg3 << ", cb1["
+          << tReg1 << ".y].xyz\n"
+          << "\tsample_resource(" << rID << ")_sampler("
+          << sID << ")_coordtype(normalized) "
+          << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n"
+          << "\tendif\n";
+      } else {
+        O << "\tiadd " << tReg1 << ".y, " << tReg1 << ".x, l0.y\n"
+          // Check if sampler has normalized setting.
+          << "\tand r0, " << tReg2 << ".x, l0.y\n"
+          // Convert image dimensions to float.
+          << "\titof " << tReg4 << ", cb1[" << tReg1 << ".x].xyz\n"
+          // Move into R0 1 if unnormalized or dimensions if normalized.
+          << "\tcmov_logical r0, r0, " << tReg4 << ", r1.1111\n"
+          // Make coordinates unnormalized.
+          << "\tmul " << tReg3 << ", r0, " << tReg3 << "\n"
+          // Get linear filtering if set.
+          << "\tand " << tReg4 << ", " << tReg2 << ".x, l6.x\n"
+          // Save unnormalized coordinates in R0.
+          << "\tmov r0, " << tReg3 << "\n"
+          // Floor the coordinates due to HW incompatibility with precision
+          // requirements.
+          << "\tflr " << tReg3 << ", " << tReg3 << "\n"
+          // get Origianl coordinates (without floor) if linear filtering
+          << "\tcmov_logical " << tReg3 << ", " << tReg4
+          << ".xxxx, r0, " << tReg3 << "\n"
+          // Normalize the coordinates with multiplying by 1/dimensions
+          << "\tmul " << tReg3 << ", " << tReg3 << ", cb1["
+          << tReg1 << ".y].xyz\n"
+          << "\tsample_resource(" << rID << ")_sampler("
+          << sID << ")_coordtype(normalized) "
+          << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n";
+      }
+      if (SamplerCount - 1) {
+        O << "\tbreak\n";
+      }
+    }
+    if (SamplerCount - 1) {
+      O << "\tendswitch\n";
+    }
+    if (ImageCount - 1) {
+      O << "\tbreak\n";
+    }
+  }
+  if (ImageCount - 1) {
+    O << "\tendswitch\n";
+  }
+#endif
+}
+void
+AMDILImageExpansion::expandImageLoad(MachineBasicBlock *mBB, MachineInstr *MI)
+{
+  uint32_t imageID = getPointerID(MI);
+  MI->getOperand(1).ChangeToImmediate(imageID);
+  saveInst = true;
+}
+void
+AMDILImageExpansion::expandImageStore(MachineBasicBlock *mBB, MachineInstr *MI)
+{
+  uint32_t imageID = getPointerID(MI);
+  mKM->setOutputInst();
+  MI->getOperand(0).ChangeToImmediate(imageID);
+  saveInst = true;
+}
+void
+AMDILImageExpansion::expandImageParam(MachineBasicBlock *mBB, MachineInstr *MI)
+{
+  uint32_t ID = getPointerID(MI);
+  DebugLoc DL = MI->getDebugLoc();
+  BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CBLOAD),
+          MI->getOperand(0).getReg())
+  .addImm(ID)
+  .addImm(1);
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,275 @@
+//===-- AMDILInliner.cpp --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "amdilinline"
+#include "AMDIL.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+namespace
+{
+class LLVM_LIBRARY_VISIBILITY AMDILInlinePass: public FunctionPass
+
+{
+public:
+  TargetMachine &TM;
+  static char ID;
+  AMDILInlinePass(TargetMachine &tm, CodeGenOpt::Level OL);
+  ~AMDILInlinePass();
+  virtual const char* getPassName() const;
+  virtual bool runOnFunction(Function &F);
+  bool doInitialization(Module &M);
+  bool doFinalization(Module &M);
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+private:
+  typedef DenseMap<const ArrayType*, SmallVector<AllocaInst*,
+          DEFAULT_VEC_SLOTS> > InlinedArrayAllocasTy;
+  bool
+  AMDILInlineCallIfPossible(CallSite CS,
+                            const TargetData *TD,
+                            InlinedArrayAllocasTy &InlinedArrayAllocas);
+  CodeGenOpt::Level OptLevel;
+};
+char AMDILInlinePass::ID = 0;
+} // anonymouse namespace
+
+
+namespace llvm
+{
+FunctionPass*
+createAMDILInlinePass(TargetMachine &tm, CodeGenOpt::Level OL)
+{
+  return new AMDILInlinePass(tm, OL);
+}
+} // llvm namespace
+
+AMDILInlinePass::AMDILInlinePass(TargetMachine &tm, CodeGenOpt::Level OL)
+  : FunctionPass(ID), TM(tm)
+{
+  OptLevel = OL;
+}
+AMDILInlinePass::~AMDILInlinePass()
+{
+}
+
+
+bool
+AMDILInlinePass::AMDILInlineCallIfPossible(CallSite CS,
+    const TargetData *TD, InlinedArrayAllocasTy &InlinedArrayAllocas)
+{
+  Function *Callee = CS.getCalledFunction();
+  Function *Caller = CS.getCaller();
+
+  // Try to inline the function.  Get the list of static allocas that were
+  // inlined.
+  SmallVector<AllocaInst*, 16> StaticAllocas;
+  InlineFunctionInfo IFI;
+  if (!InlineFunction(CS, IFI))
+    return false;
+  DEBUG(errs() << "<amdilinline> function " << Caller->getName()
+        << ": inlined call to "<< Callee->getName() << "\n");
+
+  // If the inlined function had a higher stack protection level than the
+  // calling function, then bump up the caller's stack protection level.
+  if (Callee->hasFnAttr(Attribute::StackProtectReq))
+    Caller->addFnAttr(Attribute::StackProtectReq);
+  else if (Callee->hasFnAttr(Attribute::StackProtect) &&
+           !Caller->hasFnAttr(Attribute::StackProtectReq))
+    Caller->addFnAttr(Attribute::StackProtect);
+
+
+  // Look at all of the allocas that we inlined through this call site.  If we
+  // have already inlined other allocas through other calls into this function,
+  // then we know that they have disjoint lifetimes and that we can merge them.
+  //
+  // There are many heuristics possible for merging these allocas, and the
+  // different options have different tradeoffs.  One thing that we *really*
+  // don't want to hurt is SRoA: once inlining happens, often allocas are no
+  // longer address taken and so they can be promoted.
+  //
+  // Our "solution" for that is to only merge allocas whose outermost type is an
+  // array type.  These are usually not promoted because someone is using a
+  // variable index into them.  These are also often the most important ones to
+  // merge.
+  //
+  // A better solution would be to have real memory lifetime markers in the IR
+  // and not have the inliner do any merging of allocas at all.  This would
+  // allow the backend to do proper stack slot coloring of all allocas that
+  // *actually make it to the backend*, which is really what we want.
+  //
+  // Because we don't have this information, we do this simple and useful hack.
+  //
+  SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+
+  // Loop over all the allocas we have so far and see if they can be merged with
+  // a previously inlined alloca.  If not, remember that we had it.
+
+  for (unsigned AllocaNo = 0,
+       e = IFI.StaticAllocas.size();
+       AllocaNo != e; ++AllocaNo) {
+
+    AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+
+    // Don't bother trying to merge array allocations (they will usually be
+    // canonicalized to be an allocation *of* an array), or allocations whose
+    // type is not itself an array (because we're afraid of pessimizing SRoA).
+    const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+    if (ATy == 0 || AI->isArrayAllocation())
+      continue;
+
+    // Get the list of all available allocas for this array type.
+    SmallVector<AllocaInst*, DEFAULT_VEC_SLOTS> &AllocasForType
+    = InlinedArrayAllocas[ATy];
+
+    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
+    // that we have to be careful not to reuse the same "available" alloca for
+    // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+    // set to keep track of which "available" allocas are being used by this
+    // function.  Also, AllocasForType can be empty of course!
+    bool MergedAwayAlloca = false;
+    for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+      AllocaInst *AvailableAlloca = AllocasForType[i];
+
+      // The available alloca has to be in the right function, not in some other
+      // function in this SCC.
+      if (AvailableAlloca->getParent() != AI->getParent())
+        continue;
+
+      // If the inlined function already uses this alloca then we can't reuse
+      // it.
+      if (!UsedAllocas.insert(AvailableAlloca))
+        continue;
+
+      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+      // success!
+      DEBUG(errs() << "    ***MERGED ALLOCA: " << *AI);
+
+      AI->replaceAllUsesWith(AvailableAlloca);
+      AI->eraseFromParent();
+      MergedAwayAlloca = true;
+      break;
+    }
+
+    // If we already nuked the alloca, we're done with it.
+    if (MergedAwayAlloca)
+      continue;
+
+    // If we were unable to merge away the alloca either because there are no
+    // allocas of the right type available or because we reused them all
+    // already, remember that this alloca came from an inlined function and mark
+    // it used so we don't reuse it for other allocas from this inline
+    // operation.
+    AllocasForType.push_back(AI);
+    UsedAllocas.insert(AI);
+  }
+
+  return true;
+}
+
+bool
+AMDILInlinePass::runOnFunction(Function &MF)
+{
+  Function *F = &MF;
+  const AMDILSubtarget &STM = TM.getSubtarget<AMDILSubtarget>();
+  if (STM.device()->isSupported(AMDILDeviceInfo::NoInline)) {
+    return false;
+  }
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  SmallVector<CallSite, 16> CallSites;
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      CallSite CS = CallSite(cast<Value>(I));
+      // If this isn't a call, or it is a call to an intrinsic, it can
+      // never be inlined.
+      if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
+        continue;
+
+      // If this is a direct call to an external function, we can never inline
+      // it.  If it is an indirect call, inlining may resolve it to be a
+      // direct call, so we keep it.
+      if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+        continue;
+
+      // We don't want to inline if we are recursive.
+      if (CS.getCalledFunction() && CS.getCalledFunction()->getName() == MF.getName()) {
+        AMDILMachineFunctionInfo *MFI =
+          getAnalysis<MachineFunctionAnalysis>().getMF()
+          .getInfo<AMDILMachineFunctionInfo>();
+        MFI->addErrorMsg(amd::CompilerErrorMessage[RECURSIVE_FUNCTION]);
+        continue;
+      }
+
+      CallSites.push_back(CS);
+    }
+  }
+
+  InlinedArrayAllocasTy InlinedArrayAllocas;
+  bool Changed = false;
+  for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+    CallSite CS = CallSites[CSi];
+
+    Function *Callee = CS.getCalledFunction();
+
+    // We can only inline direct calls to non-declarations.
+    if (Callee == 0 || Callee->isDeclaration()) continue;
+
+    // Attempt to inline the function...
+    if (!AMDILInlineCallIfPossible(CS, TD, InlinedArrayAllocas))
+      continue;
+    Changed = true;
+  }
+  return Changed;
+}
+
+const char*
+AMDILInlinePass::getPassName() const
+{
+  return "AMDIL Inline Function Pass";
+}
+bool
+AMDILInlinePass::doInitialization(Module &M)
+{
+  return false;
+}
+
+bool
+AMDILInlinePass::doFinalization(Module &M)
+{
+  return false;
+}
+
+void
+AMDILInlinePass::getAnalysisUsage(AnalysisUsage &AU) const
+{
+  AU.addRequired<MachineFunctionAnalysis>();
+  FunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,33 @@
+//===-- AMDILInstPrinter.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILInstPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+AMDILInstPrinter::AMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                                   const MCRegisterInfo &MRI)
+  : MCInstPrinter(MAI, MII, MRI)
+{
+}
+void
+AMDILInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef annot)
+{
+  llvm_unreachable("unsupported");
+}
+
+AMDILInstPrinter::~AMDILInstPrinter()
+{
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,37 @@
+//===-- AMDILInstPrinter.h ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILMINSTPRINTER_H_
+#define AMDILMINSTPRINTER_H_
+#include "AMDILLLVMVersion.h"
+#include "AMDILLLVMPC.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm
+{
+class MCAsmInfo;
+class MCInst;
+class raw_ostream;
+// FIXME: We will need to implement this class when we transition to use
+//        MCStreamer.
+class AMDILInstPrinter : public MCInstPrinter
+{
+public:
+  virtual ~AMDILInstPrinter();
+  AMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI);
+  virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef annot);
+};
+
+} // namespace llvm
+
+#endif // AMDILMINSTPRINTER_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrConversion.macros
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrConversion.macros?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrConversion.macros (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrConversion.macros Tue Aug 14 16:38:58 2012
@@ -0,0 +1,274 @@
+
+
+#define _32BIT_EXTENSION(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(IL_AS##TYPEINSTR##_i32 newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 RSV)))>; newline; newline;
+
+#define _32BIT_EXTENSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV, VTYPE, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(IL_AS##TYPEINSTR##_##VTYPE##i32 newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 RSV))))>; newline; newline;
+
+#define _64BIT_ZEXTENSION(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV, MASK) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(LCREATE newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 RSV)), newline; \
+(LOADCONST_i32 0))>; newline; newline;
+
+#define _64BIT_ZEXTENSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(LCREATE_##VTYPE##i64 newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 RSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 0)))>; newline; newline;
+
+#define _64BIT_SEXTENSION(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV, MASK) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(LCREATE newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 RSV)), newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline;  \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 31)))>; newline; newline;
+
+#define _64BIT_SEXTENSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(LCREATE_##VTYPE##i64 newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 RSV))), newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline;  \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 31))))>; newline; newline;
+
+#define _32BIT_F2I_CONVERSION(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, SHRINSTR, LSV, RSV, MASK) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline;  \
+(TO  newline; \
+(FPINSTR newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 RSV))))>; newline; newline; 
+
+#define _32BIT_F2I_CONVERSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, SHRINSTR, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline;  \
+(TO  newline; \
+(FPINSTR newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 RSV)))))>; newline; newline; 
+
+#define _32BIT_I2F_CONVERSION(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, MASK, TYPEINSTR) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO newline; \
+(IL_AS##TYPEINSTR##_i32 newline; \
+(BINARY_AND_i32 newline; \
+(FPINSTR REGTYPE:$src), newline; \
+(LOADCONST_i32 MASK))))>; newline; newline; 
+
+#define _32BIT_I2F_CONVERSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, MASK, TYPEINSTR, VTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO newline; \
+(IL_AS##TYPEINSTR##_##VTYPE##i32 newline; \
+(BINARY_AND_##VTYPE##i32 newline; \
+(FPINSTR REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 MASK)))))>; newline; newline; 
+
+#define _32BIT_D2I_CONVERSION(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, SHRINSTR, LSV, RSV, MASK) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline;  \
+(TO  (FTOD newline; \
+(FPINSTR newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 RSV)))))>; newline; newline; 
+
+#define _32BIT_D2I_CONVERSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, SHRINSTR, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline;  \
+(TO  (FTOD_##VTYPE##f64 newline; \
+(FPINSTR newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 RSV))))))>; newline; newline; 
+
+#define _32BIT_I2D_CONVERSION(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, MASK, TYPEINSTR) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO newline; \
+(IL_AS##TYPEINSTR##_i32 newline; \
+(BINARY_AND_i32 newline; \
+(FPINSTR (DTOF REGTYPE:$src)), newline; \
+(LOADCONST_i32 MASK))))>; newline; newline; 
+
+#define _32BIT_I2D_CONVERSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, MASK, TYPEINSTR, VTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO newline; \
+(IL_AS##TYPEINSTR##_##VTYPE##i32 newline; \
+(BINARY_AND_##VTYPE##i32 newline; \
+(FPINSTR (DTOF_##VTYPE##f32 REGTYPE:$src)), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 MASK)))))>; newline; newline; 
+
+#define _32BIT_TRUNCATE(FI, TI, FROM, TO, REGTYPE, ASTYPE, MASK, BITSHIFT) \
+def FI##to##TI##_##TO : Pat<(TO (trunc REGTYPE:$src)), newline; \
+(IL_AS##ASTYPE##_i32 newline; \
+(IL_ASINT_##FROM newline; \
+(BINARY_AND_##FROM REGTYPE:$src, newline; \
+(LOADCONST_##FROM MASK))) newline;\
+)>; newline; newline;
+
+#define _32BIT_TRUNCATE_VEC(FI, TI, FROM, TO, REGTYPE, ASTYPE, MASK, VTYPE, SCALARFROM, BITSHIFT, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (trunc REGTYPE:$src)), newline; \
+(IL_AS##ASTYPE##_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM newline; \
+(BINARY_AND_##FROM REGTYPE:$src, newline; \
+(VCREATE_##VTYPE##SCALARFROM (LOADCONST_##SCALARFROM MASK)))) newline; \
+)>; newline; newline;
+
+#define _64BIT_TRUNCATE(TI, TO, ASTYPE, MASK, BITSHIFT) \
+def lto##TI##_##TO : Pat<(TO (trunc GPRI64:$src)), newline; \
+(IL_AS##ASTYPE##_i32 newline; \
+(BINARY_AND_i32 newline; \
+(LLO GPRI64:$src), newline; \
+(LOADCONST_i32 MASK)) newline; \
+)>; newline; newline;
+
+#define _64BIT_TRUNCATE_VEC(TI, TO, ASTYPE, MASK, VTYPE, ASVTYPE, BITSHIFT) \
+def lto##TI##_##TO : Pat<(TO (trunc GPR##ASVTYPE##I64:$src)), newline; \
+(IL_AS##ASTYPE##_##VTYPE##i32 newline; \
+(BINARY_AND_##VTYPE##i32 newline; \
+(LLO_##VTYPE##i64 GPR##ASVTYPE##I64:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 MASK))) newline; \
+)>; newline; newline;
+
+
+#define GENERATE_32BIT_CONVERT(FI, TI, FROM, TO, FREGTYPE, ASTYPE, LSV, RSV) \
+_32BIT_EXTENSION(a##FI, TI, FROM, TO, FREGTYPE, anyext, ASTYPE, USHR, LSV, RSV) \
+_32BIT_EXTENSION(u##FI, TI, FROM, TO, FREGTYPE,   zext, ASTYPE, USHR, LSV, RSV) \
+_32BIT_EXTENSION(s##FI, TI, FROM, TO, FREGTYPE,   sext, ASTYPE,  SHR, LSV, RSV) \
+
+#define GENERATE_64BIT_CONVERT(FI, TI, FROM, TO, FREGTYPE, ASTYPE, LSV, RSV, MASK) \
+_64BIT_ZEXTENSION(a##FI, TI, FROM, TO, FREGTYPE, anyext, ASTYPE, USHR, LSV, RSV, MASK) \
+_64BIT_ZEXTENSION(u##FI, TI, FROM, TO, FREGTYPE,   zext, ASTYPE, USHR, LSV, RSV, MASK) \
+_64BIT_SEXTENSION(s##FI, TI, FROM, TO, FREGTYPE,   sext, ASTYPE,  SHR, LSV, RSV, MASK) \
+
+#define GENERATE_32BIT_FP_CONVERSION(FI, FROM, IREGTYPE, ASTYPE, LSV, RSV, MASK) \
+_32BIT_F2I_CONVERSION(s##FI, f, FROM, f32, IREGTYPE, sint_to_fp, ITOF,  SHR, LSV, RSV, MASK) \
+_32BIT_F2I_CONVERSION(u##FI, f, FROM, f32, IREGTYPE, uint_to_fp, UTOF, USHR, LSV, RSV, MASK) \
+_32BIT_I2F_CONVERSION(f, s##FI, f32, FROM,   GPRF32, fp_to_sint, FTOI, MASK, ASTYPE) \
+_32BIT_I2F_CONVERSION(f, u##FI, f32, FROM,   GPRF32, fp_to_uint, FTOU, MASK, ASTYPE) \
+_32BIT_D2I_CONVERSION(s##FI, d, FROM, f64, IREGTYPE, sint_to_fp, ITOF,  SHR, LSV, RSV, MASK) \
+_32BIT_D2I_CONVERSION(u##FI, d, FROM, f64, IREGTYPE, uint_to_fp, UTOF, USHR, LSV, RSV, MASK) \
+_32BIT_I2D_CONVERSION(d, s##FI, f64, FROM,   GPRF64, fp_to_sint, FTOI, MASK, ASTYPE) \
+_32BIT_I2D_CONVERSION(d, u##FI, f64, FROM,   GPRF64, fp_to_uint, FTOU, MASK, ASTYPE)
+
+// Generate the conversion rountines
+GENERATE_32BIT_CONVERT(c, s,  i8,  i16,  GPRI8, SHORT, 24, 24)
+GENERATE_32BIT_CONVERT(c, i,  i8,  i32,  GPRI8,   INT, 24, 24)
+GENERATE_64BIT_CONVERT(c, l,  i8,  i64,  GPRI8,  LONG, 24, 24, 0x000000FF)
+GENERATE_32BIT_CONVERT(s, i, i16,  i32, GPRI16,   INT, 16, 16)
+GENERATE_64BIT_CONVERT(s, l, i16,  i64, GPRI16,  LONG, 16, 16, 0x0000FFFF)
+GENERATE_64BIT_CONVERT(i, l, i32,  i64, GPRI32,  LONG,  0,  0, 0xFFFFFFFF)
+
+GENERATE_32BIT_FP_CONVERSION(c,  i8,  GPRI8, CHAR, 24, 24, 0x000000FF)
+GENERATE_32BIT_FP_CONVERSION(s, i16, GPRI16, SHORT, 16, 16, 0x0000FFFF)
+
+
+// Truncation Routines
+_32BIT_TRUNCATE(s, c, i16,  i8, GPRI16,  CHAR, 0x000000FF, 24)
+_32BIT_TRUNCATE(i, c, i32,  i8, GPRI32,  CHAR, 0x000000FF, 24)
+_32BIT_TRUNCATE(i, s, i32, i16, GPRI32, SHORT, 0x0000FFFF, 16)
+_64BIT_TRUNCATE(c,  i8,  CHAR, 0x000000FF, 24)
+_64BIT_TRUNCATE(s, i16, SHORT, 0x0000FFFF, 16)
+_64BIT_TRUNCATE(i, i32,   INT, 0xFFFFFFFF, 0)
+
+// Vector Rountines
+#define GENERATE_32BIT_CONVERT_VEC(FI, TI, FROM, TO, FREGTYPE, ASTYPE, LSV, RSV, VTYPE, ASVTYPE) \
+_32BIT_EXTENSION_VEC(a##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE, anyext, ASTYPE, USHRVEC, LSV, RSV, VTYPE, ASVTYPE) \
+_32BIT_EXTENSION_VEC(u##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE,   zext, ASTYPE, USHRVEC, LSV, RSV, VTYPE, ASVTYPE) \
+_32BIT_EXTENSION_VEC(s##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE,   sext, ASTYPE,  SHRVEC, LSV, RSV, VTYPE, ASVTYPE) \
+
+#define GENERATE_64BIT_CONVERT_VEC(FI, TI, FROM, TO, FREGTYPE, ASTYPE, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_64BIT_ZEXTENSION_VEC(a##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE, anyext, ASTYPE, USHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_64BIT_ZEXTENSION_VEC(u##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE,   zext, ASTYPE, USHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_64BIT_SEXTENSION_VEC(s##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE,   sext, ASTYPE,  SHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+
+#define GENERATE_32BIT_FP_CONVERSION_VEC2(FI, FROM, IREGTYPE, ASTYPE, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_F2I_CONVERSION_VEC(s##FI, f, VTYPE##FROM, VTYPE##f32, IREGTYPE, sint_to_fp, ITOF_##VTYPE##f32,  SHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_F2I_CONVERSION_VEC(u##FI, f, VTYPE##FROM, VTYPE##f32, IREGTYPE, uint_to_fp, UTOF_##VTYPE##f32, USHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_I2F_CONVERSION_VEC(f, s##FI, VTYPE##f32, VTYPE##FROM,   GPRV2F32, fp_to_sint, FTOI_##VTYPE##i32, MASK, ASTYPE, VTYPE) \
+_32BIT_I2F_CONVERSION_VEC(f, u##FI, VTYPE##f32, VTYPE##FROM,   GPRV2F32, fp_to_uint, FTOU_##VTYPE##i32, MASK, ASTYPE, VTYPE) \
+_32BIT_D2I_CONVERSION_VEC(s##FI, d, VTYPE##FROM, VTYPE##f64, IREGTYPE, sint_to_fp, ITOF_##VTYPE##f32,  SHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_D2I_CONVERSION_VEC(u##FI, d, VTYPE##FROM, VTYPE##f64, IREGTYPE, uint_to_fp, UTOF_##VTYPE##f32, USHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_I2D_CONVERSION_VEC(d, s##FI, VTYPE##f64, VTYPE##FROM,   GPRV2F64, fp_to_sint, FTOI_##VTYPE##i32, MASK, ASTYPE, VTYPE) \
+_32BIT_I2D_CONVERSION_VEC(d, u##FI, VTYPE##f64, VTYPE##FROM,   GPRV2F64, fp_to_uint, FTOU_##VTYPE##i32, MASK, ASTYPE, VTYPE)
+
+#define GENERATE_32BIT_FP_CONVERSION_VEC4(FI, FROM, IREGTYPE, ASTYPE, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_F2I_CONVERSION_VEC(s##FI, f, VTYPE##FROM, VTYPE##f32, IREGTYPE, sint_to_fp, ITOF_##VTYPE##f32,  SHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_F2I_CONVERSION_VEC(u##FI, f, VTYPE##FROM, VTYPE##f32, IREGTYPE, uint_to_fp, UTOF_##VTYPE##f32, USHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_I2F_CONVERSION_VEC(f, s##FI, VTYPE##f32, VTYPE##FROM,   GPRV4F32, fp_to_sint, FTOI_##VTYPE##i32, MASK, ASTYPE, VTYPE) \
+_32BIT_I2F_CONVERSION_VEC(f, u##FI, VTYPE##f32, VTYPE##FROM,   GPRV4F32, fp_to_uint, FTOU_##VTYPE##i32, MASK, ASTYPE, VTYPE) 
+
+// Generate the conversion rountines for 2 component vectors
+GENERATE_32BIT_CONVERT_VEC(c, s,  i8,  i16,  GPRV2I8, V2SHORT, 24, 24, v2, V2)
+GENERATE_32BIT_CONVERT_VEC(c, i,  i8,  i32,  GPRV2I8,   V2INT, 24, 24, v2, V2)
+GENERATE_64BIT_CONVERT_VEC(c, l,  i8,  i64,  GPRV2I8,  V2LONG, 24, 24, 0x000000FF, v2, V2)
+GENERATE_32BIT_CONVERT_VEC(s, i, i16,  i32, GPRV2I16,   V2INT, 16, 16, v2, V2)
+GENERATE_64BIT_CONVERT_VEC(s, l, i16,  i64, GPRV2I16,  V2LONG, 16, 16, 0x0000FFFF, v2, V2)
+GENERATE_64BIT_CONVERT_VEC(i, l, i32,  i64, GPRV2I32,  V2LONG,  0,  0, 0xFFFFFFFF, v2, V2)
+
+GENERATE_32BIT_FP_CONVERSION_VEC2(c,  i8,  GPRV2I8,  V2CHAR, 24, 24, 0x000000FF, v2, V2)
+GENERATE_32BIT_FP_CONVERSION_VEC2(s, i16, GPRV2I16, V2SHORT, 16, 16, 0x0000FFFF, v2, V2)
+
+
+// Truncation Routines
+_32BIT_TRUNCATE_VEC(s, c, v2i16,  v2i8, GPRV2I16,  V2CHAR, 0x000000FF, v2, i16, 24, V2)
+_32BIT_TRUNCATE_VEC(i, c, v2i32,  v2i8, GPRV2I32,  V2CHAR, 0x000000FF, v2, i32, 24, V2)
+_32BIT_TRUNCATE_VEC(i, s, v2i32, v2i16, GPRV2I32, V2SHORT, 0x0000FFFF, v2, i32, 16, V2)
+_64BIT_TRUNCATE_VEC(c,  v2i8,  V2CHAR, 0x000000FF, v2, V2, 24)
+_64BIT_TRUNCATE_VEC(s, v2i16, V2SHORT, 0x0000FFFF, v2, V2, 16)
+_64BIT_TRUNCATE_VEC(i, v2i32,   V2INT, 0xFFFFFFFF, v2, V2, 0)
+
+// Generate the conversion rountines for 4 component vectors
+GENERATE_32BIT_CONVERT_VEC(c, s,  i8,  i16,  GPRV4I8, V4SHORT, 24, 24, v4, V4)
+GENERATE_32BIT_CONVERT_VEC(c, i,  i8,  i32,  GPRV4I8,   V4INT, 24, 24, v4, V4)
+GENERATE_32BIT_CONVERT_VEC(s, i, i16,  i32, GPRV4I16,   V4INT, 16, 16, v4, V4)
+
+GENERATE_32BIT_FP_CONVERSION_VEC4(c,  i8,  GPRV4I8,  V4CHAR, 24, 24, 0x000000FF, v4, V4)
+GENERATE_32BIT_FP_CONVERSION_VEC4(s, i16, GPRV4I16, V4SHORT, 16, 16, 0x0000FFFF, v4, V4)
+
+
+// Truncation Routines
+_32BIT_TRUNCATE_VEC(s, c, v4i16,  v4i8, GPRV4I16,  V4CHAR, 0x000000FF, v4, i16, 24, V4)
+_32BIT_TRUNCATE_VEC(i, c, v4i32,  v4i8, GPRV4I32,  V4CHAR, 0x000000FF, v4, i32, 24, V4)
+_32BIT_TRUNCATE_VEC(i, s, v4i32, v4i16, GPRV4I32, V4SHORT, 0x0000FFFF, v4, i32, 16, V4)
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,923 @@
+//===-- AMDILInstrInfo.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "AMDILUtilityFunctions.h"
+#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_MC_DESC
+#include "AMDILGenInstrInfo.inc"
+#include "AMDILMachineFunctionInfo.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+AMDILInstrInfo::AMDILInstrInfo(AMDILTargetMachine &tm)
+  : AMDILGenInstrInfo(AMDIL::ADJCALLSTACKDOWN, AMDIL::ADJCALLSTACKUP),
+    RI(tm, *this),
+    TM(tm)
+{
+}
+
+const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const
+{
+  return RI;
+}
+
+/// Return true if the instruction is a register to register move and leave the
+/// source and dest operands in the passed parameters.
+bool AMDILInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
+                                 unsigned int &DstReg, unsigned int &SrcSubIdx,
+                                 unsigned int &DstSubIdx) const
+{
+  // FIXME: we should look for:
+  //    add with 0
+  //assert(0 && "is Move Instruction has not been implemented yet!");
+  //return true;
+  if (!isMove(MI.getOpcode())) {
+    return false;
+  }
+  if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) {
+    return false;
+  }
+  SrcReg = MI.getOperand(1).getReg();
+  DstReg = MI.getOperand(0).getReg();
+  DstSubIdx = 0;
+  SrcSubIdx = 0;
+  return true;
+}
+
+bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+    unsigned &SrcReg, unsigned &DstReg,
+    unsigned &SubIdx) const
+{
+  return false;
+  unsigned opc = MI.getOpcode();
+  SubIdx = llvm::NoSubRegister;
+  switch (opc) {
+  default:
+    return false;
+  case AMDIL::DHI:
+  case AMDIL::LHI:
+    if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+      // Be conservative.
+      return false;
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SubIdx = llvm::sub_y_comp;
+    break;
+  case AMDIL::DLO:
+  case AMDIL::LLO:
+    if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+      // Be conservative.
+      return false;
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SubIdx = llvm::sub_x_comp;
+    break;
+  case AMDIL::VEXTRACT_v2f64:
+  case AMDIL::VEXTRACT_v2i64:
+    if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+      // Be conservative.
+      return false;
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    assert(MI.getOperand(2).isImm()
+           && "Operand 2 must be an immediate value!");
+    switch (MI.getOperand(2).getImm()) {
+    case 0:
+      SubIdx = llvm::sub_xy_comp;
+      break;
+    case 1:
+      SubIdx = llvm::sub_zw_comp;
+      break;
+    default:
+      return false;
+    };
+  case AMDIL::VEXTRACT_v2f32:
+  case AMDIL::VEXTRACT_v2i32:
+  case AMDIL::VEXTRACT_v2i16:
+  case AMDIL::VEXTRACT_v2i8:
+  case AMDIL::VEXTRACT_v4f32:
+  case AMDIL::VEXTRACT_v4i32:
+  case AMDIL::VEXTRACT_v4i16:
+  case AMDIL::VEXTRACT_v4i8:
+    if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+      // Be conservative.
+      return false;
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    assert(MI.getOperand(2).isImm()
+           && "Operand 2 must be an immediate value!");
+    switch (MI.getOperand(2).getImm()) {
+    case 0:
+      SubIdx = llvm::sub_x_comp;
+      break;
+    case 1:
+      SubIdx = llvm::sub_y_comp;
+      break;
+    case 2:
+      SubIdx = llvm::sub_z_comp;
+      break;
+    case 3:
+      SubIdx = llvm::sub_w_comp;
+      break;
+    default:
+      return false;
+    };
+  };
+  return SubIdx != llvm::NoSubRegister;
+}
+
+unsigned AMDILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+    int &FrameIndex) const
+{
+  if (isPrivateInst(TM, MI) && isLoadInst(TM, MI) && MI->getOperand(1).isFI()) {
+    FrameIndex = MI->getOperand(1).getIndex();
+    return MI->getOperand(0).getReg();
+  }
+  return 0;
+}
+
+unsigned AMDILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+    int &FrameIndex) const
+{
+  if (isPrivateInst(TM, MI) && isLoadInst(TM, MI) && MI->getOperand(1).isFI()) {
+    FrameIndex = MI->getOperand(1).getIndex();
+    return MI->getOperand(0).getReg();
+  }
+  return 0;
+}
+
+bool AMDILInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+    const MachineMemOperand *&MMO,
+    int &FrameIndex) const
+{
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+       oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isLoad() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+            dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        MMO = *o;
+        return true;
+      }
+  }
+  return false;
+}
+unsigned AMDILInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+    int &FrameIndex) const
+{
+  if (isPrivateInst(TM, MI) && isStoreInst(TM, MI) && MI->getOperand(1).isFI()) {
+    FrameIndex = MI->getOperand(1).getIndex();
+    return MI->getOperand(0).getReg();
+  }
+  return 0;
+}
+unsigned AMDILInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+    int &FrameIndex) const
+{
+  if (isPrivateInst(TM, MI) && isStoreInst(TM, MI) && MI->getOperand(1).isFI()) {
+    unsigned Reg;
+    if ((Reg = isStoreToStackSlot(MI, FrameIndex))) {
+      return Reg;
+    }
+    const MachineMemOperand *Dummy = NULL;
+    return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+  }
+  return 0;
+}
+bool AMDILInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+    const MachineMemOperand *&MMO,
+    int &FrameIndex) const
+{
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+       oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isStore() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+            dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        MMO = *o;
+        return true;
+      }
+  }
+  return false;
+}
+
+void
+AMDILInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MI,
+                              unsigned DestReg, unsigned SubIdx,
+                              const MachineInstr *Orig,
+                              const TargetRegisterInfo &TRI) const
+{
+  // TODO: Implement this function
+}
+
+MachineInstr*
+AMDILInstrInfo::duplicate(MachineInstr *Orig,
+                          MachineFunction &MF) const
+{
+  // TODO: Implement this function
+  return MF.CloneMachineInstr(Orig);
+}
+
+MachineInstr *
+AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                      MachineBasicBlock::iterator &MBBI,
+                                      LiveVariables *LV) const
+{
+  // TODO: Implement this function
+  return NULL;
+}
+
+MachineInstr*
+AMDILInstrInfo::commuteInstruction(MachineInstr *MI,
+                                   bool NewMI) const
+{
+  // TODO: Implement this function
+  return NULL;
+}
+bool
+AMDILInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+                                      unsigned &SrcOpIdx2) const
+{
+  // TODO: Implement this function
+  return false;
+}
+bool
+AMDILInstrInfo::produceSameValue(const MachineInstr *MI0,
+                                 const MachineInstr *MI1,
+                                 const MachineRegisterInfo *MRI) const
+{
+  // TODO: Implement this function
+  return false;
+}
+
+bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
+                                        MachineBasicBlock &MBB) const
+{
+  while (iter != MBB.end()) {
+    switch (iter->getOpcode()) {
+    default:
+      break;
+      ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+    case AMDIL::BRANCH:
+      return true;
+    };
+    ++iter;
+  }
+  return false;
+}
+
+bool AMDILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                   MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   bool AllowModify) const
+{
+  bool retVal = true;
+  return retVal;
+  MachineBasicBlock::iterator iter = MBB.begin();
+  if (!getNextBranchInstr(iter, MBB)) {
+    retVal = false;
+  } else {
+    MachineInstr *firstBranch = iter;
+    if (!getNextBranchInstr(++iter, MBB)) {
+      if (firstBranch->getOpcode() == AMDIL::BRANCH) {
+        TBB = firstBranch->getOperand(0).getMBB();
+        firstBranch->eraseFromParent();
+        retVal = false;
+      } else {
+        TBB = firstBranch->getOperand(0).getMBB();
+        FBB = *(MBB.succ_begin()+1);
+        if (FBB == TBB) {
+          FBB = *(MBB.succ_begin());
+        }
+        Cond.push_back(firstBranch->getOperand(1));
+        retVal = false;
+      }
+    } else {
+      MachineInstr *secondBranch = iter;
+      if (!getNextBranchInstr(++iter, MBB)) {
+        if (secondBranch->getOpcode() == AMDIL::BRANCH) {
+          TBB = firstBranch->getOperand(0).getMBB();
+          Cond.push_back(firstBranch->getOperand(1));
+          FBB = secondBranch->getOperand(0).getMBB();
+          secondBranch->eraseFromParent();
+          retVal = false;
+        } else {
+          assert(0 && "Should not have two consecutive conditional branches");
+        }
+      } else {
+        MBB.getParent()->viewCFG();
+        assert(0 && "Should not have three branch instructions in"
+               " a single basic block");
+        retVal = false;
+      }
+    }
+  }
+  return retVal;
+}
+
+unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const
+{
+  const MachineInstr *MI = op.getParent();
+
+  switch (MI->getDesc().OpInfo->RegClass) {
+  default: // FIXME: fallthrough??
+  case AMDIL::GPRI8RegClassID:
+    return AMDIL::BRANCH_COND_i8;
+  case AMDIL::GPRI16RegClassID:
+    return AMDIL::BRANCH_COND_i16;
+  case AMDIL::GPRI32RegClassID:
+    return AMDIL::BRANCH_COND_i32;
+  case AMDIL::GPRI64RegClassID:
+    return AMDIL::BRANCH_COND_i64;
+  case AMDIL::GPRF32RegClassID:
+    return AMDIL::BRANCH_COND_f32;
+  case AMDIL::GPRF64RegClassID:
+    return AMDIL::BRANCH_COND_f64;
+  };
+}
+
+unsigned int
+AMDILInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+                             MachineBasicBlock *TBB,
+                             MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond,
+                             DebugLoc DL) const
+{
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  for (unsigned int x = 0; x < Cond.size(); ++x) {
+    Cond[x].getParent()->dump();
+  }
+  if (FBB == 0) {
+    if (Cond.empty()) {
+      BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(TBB);
+    } else {
+      BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
+      .addMBB(TBB).addReg(Cond[0].getReg());
+    }
+    return 1;
+  } else {
+    BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
+    .addMBB(TBB).addReg(Cond[0].getReg());
+    BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(FBB);
+  }
+  assert(0 && "Inserting two branches not supported");
+  return 0;
+}
+
+unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) {
+    return 0;
+  }
+  --I;
+  switch (I->getOpcode()) {
+  default:
+    return 0;
+    ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+  case AMDIL::BRANCH:
+    I->eraseFromParent();
+    break;
+  }
+  I = MBB.end();
+
+  if (I == MBB.begin()) {
+    return 1;
+  }
+  --I;
+  switch (I->getOpcode()) {
+    // FIXME: only one case??
+  default:
+    return 1;
+    ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+    I->eraseFromParent();
+    break;
+  }
+  return 2;
+}
+
+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB)
+{
+  MachineBasicBlock::iterator tmp = MBB->end();
+  if (!MBB->size()) {
+    return MBB->end();
+  }
+  while (--tmp) {
+    if (tmp->getOpcode() == AMDIL::ENDLOOP
+        || tmp->getOpcode() == AMDIL::ENDIF
+        || tmp->getOpcode() == AMDIL::ELSE) {
+      if (tmp == MBB->begin()) {
+        return tmp;
+      } else {
+        continue;
+      }
+    }  else {
+      return ++tmp;
+    }
+  }
+  return MBB->end();
+}
+
+bool
+AMDILInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I,
+                             unsigned DestReg, unsigned SrcReg,
+                             const TargetRegisterClass *DestRC,
+                             const TargetRegisterClass *SrcRC,
+                             DebugLoc DL) const
+{
+  // If we are adding to the end of a basic block we can safely assume that the
+  // move is caused by a PHI node since all move instructions that are non-PHI
+  // have already been inserted into the basic blocks Therefor we call the skip
+  // flow control instruction to move the iterator before the flow control
+  // instructions and put the move instruction there.
+  bool phi = (DestReg < 1025) || (SrcReg < 1025);
+  int movInst = phi ? getMoveInstFromID(DestRC->getID())
+                : getPHIMoveInstFromID(DestRC->getID());
+
+  MachineBasicBlock::iterator iTemp = (I == MBB.end()) ? skipFlowControl(&MBB)
+                                      : I;
+  if (DestRC != SrcRC) {
+    //int convInst;
+    size_t dSize = DestRC->getSize();
+    size_t sSize = SrcRC->getSize();
+    if (dSize > sSize) {
+      // Elements are going to get duplicated.
+      BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+    } else if (dSize == sSize) {
+      // Direct copy, conversions are not handled.
+      BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+    } else if (dSize < sSize) {
+      // Elements are going to get dropped.
+      BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+    }
+  } else {
+    BuildMI( MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+  }
+  return true;
+}
+void
+AMDILInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI, DebugLoc DL,
+                            unsigned DestReg, unsigned SrcReg,
+                            bool KillSrc) const
+{
+  BuildMI(MBB, MI, DL, get(AMDIL::MOVE_v4i32), DestReg)
+  .addReg(SrcReg, getKillRegState(KillSrc));
+  return;
+#if 0
+  DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+        << " to " << RI.getName(DestReg) << '\n');
+  llvm_unreachable("Cannot emit physreg copy instruction");
+#endif
+}
+void
+AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned SrcReg, bool isKill,
+                                    int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const
+{
+  unsigned int Opc = 0;
+  MachineFunction &MF = *(MBB.getParent());
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  DebugLoc DL;
+  switch (RC->getID()) {
+  default:
+    Opc = AMDIL::PRIVATESTORE_v4i32;
+    break;
+  case AMDIL::GPRF32RegClassID:
+    Opc = AMDIL::PRIVATESTORE_f32;
+    break;
+  case AMDIL::GPRF64RegClassID:
+    Opc = AMDIL::PRIVATESTORE_f64;
+    break;
+  case AMDIL::GPRI16RegClassID:
+    Opc = AMDIL::PRIVATESTORE_i16;
+    break;
+  case AMDIL::GPRI32RegClassID:
+    Opc = AMDIL::PRIVATESTORE_i32;
+    break;
+  case AMDIL::GPRI8RegClassID:
+    Opc = AMDIL::PRIVATESTORE_i8;
+    break;
+  case AMDIL::GPRI64RegClassID:
+    Opc = AMDIL::PRIVATESTORE_i64;
+    break;
+  case AMDIL::GPRV2F32RegClassID:
+    Opc = AMDIL::PRIVATESTORE_v2f32;
+    break;
+  case AMDIL::GPRV2F64RegClassID:
+    Opc = AMDIL::PRIVATESTORE_v2f64;
+    break;
+  case AMDIL::GPRV2I16RegClassID:
+    Opc = AMDIL::PRIVATESTORE_v2i16;
+    break;
+  case AMDIL::GPRV2I32RegClassID:
+    Opc = AMDIL::PRIVATESTORE_v2i32;
+    break;
+  case AMDIL::GPRV2I8RegClassID:
+    Opc = AMDIL::PRIVATESTORE_v2i8;
+    break;
+  case AMDIL::GPRV2I64RegClassID:
+    Opc = AMDIL::PRIVATESTORE_v2i64;
+    break;
+  case AMDIL::GPRV4F32RegClassID:
+    Opc = AMDIL::PRIVATESTORE_v4f32;
+    break;
+  case AMDIL::GPRV4I16RegClassID:
+    Opc = AMDIL::PRIVATESTORE_v4i16;
+    break;
+  case AMDIL::GPRV4I32RegClassID:
+    Opc = AMDIL::PRIVATESTORE_v4i32;
+    break;
+  case AMDIL::GPRV4I8RegClassID:
+    Opc = AMDIL::PRIVATESTORE_v4i8;
+    break;
+  }
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  MachineMemOperand *MMO =
+    new MachineMemOperand(
+    MachinePointerInfo::getFixedStack(FrameIndex),
+    MachineMemOperand::MOLoad,
+    MFI.getObjectSize(FrameIndex),
+    MFI.getObjectAlignment(FrameIndex));
+  if (MI != MBB.end()) {
+    DL = MI->getDebugLoc();
+  }
+  MachineInstr *nMI = BuildMI(MBB, MI, DL, get(Opc))
+                      .addReg(SrcReg, getKillRegState(isKill))
+                      .addFrameIndex(FrameIndex)
+                      .addMemOperand(MMO)
+                      .addImm(0);
+  AMDILMachineFunctionInfo *mfinfo = MF.getInfo<AMDILMachineFunctionInfo>();
+  mfinfo->setUsesScratch();
+  AMDILAS::InstrResEnc curRes;
+  curRes.bits.ResourceID
+  = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+  setAsmPrinterFlags(nMI, curRes);
+}
+
+void
+AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI,
+                                     unsigned DestReg, int FrameIndex,
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const
+{
+  unsigned int Opc = 0;
+  MachineFunction &MF = *(MBB.getParent());
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  DebugLoc DL;
+  switch (RC->getID()) {
+  default:
+    Opc = AMDIL::PRIVATELOAD_v4i32;
+    break;
+  case AMDIL::GPRF32RegClassID:
+    Opc = AMDIL::PRIVATELOAD_f32;
+    break;
+  case AMDIL::GPRF64RegClassID:
+    Opc = AMDIL::PRIVATELOAD_f64;
+    break;
+  case AMDIL::GPRI16RegClassID:
+    Opc = AMDIL::PRIVATELOAD_i16;
+    break;
+  case AMDIL::GPRI32RegClassID:
+    Opc = AMDIL::PRIVATELOAD_i32;
+    break;
+  case AMDIL::GPRI8RegClassID:
+    Opc = AMDIL::PRIVATELOAD_i8;
+    break;
+  case AMDIL::GPRI64RegClassID:
+    Opc = AMDIL::PRIVATELOAD_i64;
+    break;
+  case AMDIL::GPRV2F32RegClassID:
+    Opc = AMDIL::PRIVATELOAD_v2f32;
+    break;
+  case AMDIL::GPRV2F64RegClassID:
+    Opc = AMDIL::PRIVATELOAD_v2f64;
+    break;
+  case AMDIL::GPRV2I16RegClassID:
+    Opc = AMDIL::PRIVATELOAD_v2i16;
+    break;
+  case AMDIL::GPRV2I32RegClassID:
+    Opc = AMDIL::PRIVATELOAD_v2i32;
+    break;
+  case AMDIL::GPRV2I8RegClassID:
+    Opc = AMDIL::PRIVATELOAD_v2i8;
+    break;
+  case AMDIL::GPRV2I64RegClassID:
+    Opc = AMDIL::PRIVATELOAD_v2i64;
+    break;
+  case AMDIL::GPRV4F32RegClassID:
+    Opc = AMDIL::PRIVATELOAD_v4f32;
+    break;
+  case AMDIL::GPRV4I16RegClassID:
+    Opc = AMDIL::PRIVATELOAD_v4i16;
+    break;
+  case AMDIL::GPRV4I32RegClassID:
+    Opc = AMDIL::PRIVATELOAD_v4i32;
+    break;
+  case AMDIL::GPRV4I8RegClassID:
+    Opc = AMDIL::PRIVATELOAD_v4i8;
+    break;
+  }
+
+  MachineMemOperand *MMO =
+    new MachineMemOperand(
+    MachinePointerInfo::getFixedStack(FrameIndex),
+    MachineMemOperand::MOLoad,
+    MFI.getObjectSize(FrameIndex),
+    MFI.getObjectAlignment(FrameIndex));
+  if (MI != MBB.end()) {
+    DL = MI->getDebugLoc();
+  }
+  AMDILMachineFunctionInfo *mfinfo = MF.getInfo<AMDILMachineFunctionInfo>();
+  mfinfo->setUsesScratch();
+  MachineInstr* nMI = BuildMI(MBB, MI, DL, get(Opc))
+                      .addReg(DestReg, RegState::Define)
+                      .addFrameIndex(FrameIndex)
+                      .addMemOperand(MMO)
+                      .addImm(0);
+  AMDILAS::InstrResEnc curRes;
+  curRes.bits.ResourceID
+  = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+  setAsmPrinterFlags(nMI, curRes);
+
+}
+#if 0
+MachineInstr *
+AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr *MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      int FrameIndex) const
+{
+  // TODO: Implement this function
+  return 0;
+}
+MachineInstr*
+AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr *MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr *LoadMI) const
+{
+  // TODO: Implement this function
+  return 0;
+}
+#endif
+
+#if 0
+bool
+AMDILInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                     const SmallVectorImpl<unsigned> &Ops) const
+{
+  // TODO: Implement this function
+  return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
+}
+bool
+AMDILInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                                    unsigned Reg, bool UnfoldLoad,
+                                    bool UnfoldStore,
+                                    SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+  // TODO: Implement this function
+  return false;
+}
+
+bool
+AMDILInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                                    SmallVectorImpl<SDNode*> &NewNodes) const
+{
+  // TODO: Implement this function
+  return false;
+}
+
+unsigned
+AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+    bool UnfoldLoad, bool UnfoldStore,
+    unsigned *LoadRegIndex) const
+{
+  // TODO: Implement this function
+  return 0;
+}
+#endif
+bool
+AMDILInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                        int64_t &Offset1,
+                                        int64_t &Offset2) const
+{
+  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) {
+    return false;
+  }
+  const MachineSDNode *mload1 = dyn_cast<MachineSDNode>(Load1);
+  const MachineSDNode *mload2 = dyn_cast<MachineSDNode>(Load2);
+  if (!mload1 || !mload2) {
+    return false;
+  }
+  if (mload1->memoperands_empty() ||
+      mload2->memoperands_empty()) {
+    return false;
+  }
+  MachineMemOperand *memOp1 = (*mload1->memoperands_begin());
+  MachineMemOperand *memOp2 = (*mload2->memoperands_begin());
+  const Value *mv1 = memOp1->getValue();
+  const Value *mv2 = memOp2->getValue();
+  if (!memOp1->isLoad() || !memOp2->isLoad()) {
+    return false;
+  }
+  if (getBasePointerValue(mv1) == getBasePointerValue(mv2)) {
+    if (isa<GetElementPtrInst>(mv1) && isa<GetElementPtrInst>(mv2)) {
+      const GetElementPtrInst *gep1 = dyn_cast<GetElementPtrInst>(mv1);
+      const GetElementPtrInst *gep2 = dyn_cast<GetElementPtrInst>(mv2);
+      if (!gep1 || !gep2) {
+        return false;
+      }
+      if (gep1->getNumOperands() != gep2->getNumOperands()) {
+        return false;
+      }
+      for (unsigned i = 0, e = gep1->getNumOperands() - 1; i < e; ++i) {
+        const Value *op1 = gep1->getOperand(i);
+        const Value *op2 = gep2->getOperand(i);
+        if (op1 != op2) {
+          // If any value except the last one is different, return false.
+          return false;
+        }
+      }
+      unsigned size = gep1->getNumOperands()-1;
+      if (!isa<ConstantInt>(gep1->getOperand(size))
+          || !isa<ConstantInt>(gep2->getOperand(size))) {
+        return false;
+      }
+      Offset1 = dyn_cast<ConstantInt>(gep1->getOperand(size))->getSExtValue();
+      Offset2 = dyn_cast<ConstantInt>(gep2->getOperand(size))->getSExtValue();
+      return true;
+    } else if (isa<Argument>(mv1) && isa<Argument>(mv2)) {
+      return false;
+    } else if (isa<GlobalValue>(mv1) && isa<GlobalValue>(mv2)) {
+      return false;
+    }
+  }
+  return false;
+}
+
+bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+    int64_t Offset1, int64_t Offset2,
+    unsigned NumLoads) const
+{
+  LoadSDNode *LoadSD1 = dyn_cast<LoadSDNode>(Load1);
+  LoadSDNode *LoadSD2 = dyn_cast<LoadSDNode>(Load2);
+  if (!LoadSD1 || !LoadSD2) {
+    return false;
+  }
+  // We only care about scheduling loads near for global address space.
+  if (dyn_cast<PointerType>(LoadSD1->getSrcValue()->getType())
+      ->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) {
+    return false;
+  }
+  // We only care about scheduling loads near for global address space.
+  if (dyn_cast<PointerType>(LoadSD2->getSrcValue()->getType())
+      ->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) {
+    return false;
+  }
+  assert(Offset2 > Offset1
+         && "Second offset should be larger than first offset!");
+  // If we have less than 16 loads in a row, and the offsets are within 16,
+  // then schedule together.
+  // TODO: Make the loads schedule near if it fits in a cacheline
+  return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+}
+
+bool AMDILInstrInfo::shouldScheduleWithNormalPriority(SDNode* instruction) const
+{
+  if (instruction->isMachineOpcode()) {
+    unsigned int Opc = instruction->getMachineOpcode();
+    switch(Opc) {
+    case AMDIL::BARRIER_7XX:
+    case AMDIL::BARRIER_EGNI:
+    case AMDIL::BARRIER_LOCAL:
+    case AMDIL::BARRIER_GLOBAL:
+    case AMDIL::BARRIER_REGION:
+    case AMDIL::FENCE:
+    case AMDIL::FENCE_LOCAL:
+    case AMDIL::FENCE_GLOBAL:
+    case AMDIL::FENCE_REGION:
+    case AMDIL::FENCE_READ_ONLY:
+    case AMDIL::FENCE_READ_ONLY_LOCAL:
+    case AMDIL::FENCE_READ_ONLY_GLOBAL:
+    case AMDIL::FENCE_READ_ONLY_REGION:
+    case AMDIL::FENCE_WRITE_ONLY:
+    case AMDIL::FENCE_WRITE_ONLY_LOCAL:
+    case AMDIL::FENCE_WRITE_ONLY_GLOBAL:
+    case AMDIL::FENCE_WRITE_ONLY_REGION:
+      return true;  // Maybe other instructions will need to be added to this?
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
+bool
+AMDILInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+const
+{
+  // TODO: Implement this function
+  return true;
+}
+void AMDILInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const
+{
+  // TODO: Implement this function
+}
+
+bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const
+{
+  // TODO: Implement this function
+  return false;
+}
+bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const
+{
+  // TODO: Implement this function
+  return false;
+}
+
+bool AMDILInstrInfo::PredicateInstruction(MachineInstr *MI,
+    const SmallVectorImpl<MachineOperand> &Pred) const
+{
+  // TODO: Implement this function
+  return false;
+}
+
+bool
+AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                                  const SmallVectorImpl<MachineOperand> &Pred2)
+const
+{
+  // TODO: Implement this function
+  return false;
+}
+
+bool AMDILInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                      std::vector<MachineOperand> &Pred) const
+{
+  // TODO: Implement this function
+  return false;
+}
+
+bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const
+{
+  // TODO: Implement this function
+  return MI->getDesc().isPredicable();
+}
+
+bool
+AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const
+{
+  // TODO: Implement this function
+  return true;
+}
+
+unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const
+{
+  // TODO: Implement this function
+  return 0;
+}
+
+
+unsigned
+AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const
+{
+  // TODO: Implement this function
+  return 0;
+}
+
+unsigned AMDILInstrInfo::getInlineAsmLength(const char *Str,
+    const MCAsmInfo &MAI) const
+{
+  // TODO: Implement this function
+  return 0;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,195 @@
+//===-- AMDILInstrInfo.h --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILINSTRUCTIONINFO_H_
+#define AMDILINSTRUCTIONINFO_H_
+
+#include "AMDIL.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include "AMDILRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AMDILGenInstrInfo.inc"
+
+namespace llvm
+{
+// AMDIL - This namespace holds all of the target specific flags that
+// instruction info tracks.
+//
+//class AMDILTargetMachine;
+class AMDILInstrInfo : public AMDILGenInstrInfo
+{
+private:
+  const AMDILRegisterInfo RI;
+  AMDILTargetMachine &TM;
+  bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
+                          MachineBasicBlock &MBB) const;
+  unsigned int getBranchInstr(const MachineOperand &op) const;
+public:
+  explicit AMDILInstrInfo(AMDILTargetMachine &tm);
+
+  // getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  // such, whenever a client has an instance of instruction info, it should
+  // always be able to get register info as well (through this method).
+  const AMDILRegisterInfo &getRegisterInfo() const;
+
+  // Return true if the instruction is a register to register move and leave the
+  // source and dest operands in the passed parameters.
+  bool isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
+                   unsigned int &DstReg, unsigned int &SrcSubIdx,
+                   unsigned int &DstSubIdx) const;
+
+  bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+                             unsigned &DstReg, unsigned &SubIdx) const;
+
+  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                     int &FrameIndex) const;
+  bool hasLoadFromStackSlot(const MachineInstr *MI,
+                            const MachineMemOperand *&MMO,
+                            int &FrameIndex) const;
+  unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+                                    int &FrameIndex) const;
+  bool hasStoreToStackSlot(const MachineInstr *MI,
+                           const MachineMemOperand *&MMO,
+                           int &FrameIndex) const;
+
+
+
+  void reMaterialize(MachineBasicBlock &MBB,
+                     MachineBasicBlock::iterator MI,
+                     unsigned DestReg, unsigned SubIdx,
+                     const MachineInstr *Orig,
+                     const TargetRegisterInfo &TRI) const;
+
+  MachineInstr *duplicate(MachineInstr *Orig,
+                          MachineFunction &MF) const;
+
+  MachineInstr *
+  convertToThreeAddress(MachineFunction::iterator &MFI,
+                        MachineBasicBlock::iterator &MBBI,
+                        LiveVariables *LV) const;
+
+  MachineInstr *commuteInstruction(MachineInstr *MI,
+                                   bool NewMI = false) const;
+  bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+                             unsigned &SrcOpIdx2) const;
+  bool produceSameValue(const MachineInstr *MI0,
+                        const MachineInstr *MI1,
+                        const MachineRegisterInfo *MRI = 0) const;
+
+
+
+  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify) const;
+
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  unsigned
+  InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+               MachineBasicBlock *FBB,
+               const SmallVectorImpl<MachineOperand> &Cond,
+               DebugLoc DL) const;
+
+  bool copyRegToReg(MachineBasicBlock &MBB,
+                    MachineBasicBlock::iterator I,
+                    unsigned DestReg, unsigned SrcReg,
+                    const TargetRegisterClass *DestRC,
+                    const TargetRegisterClass *SrcRC,
+                    DebugLoc DL) const;
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator MI, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            unsigned DestReg, int FrameIndex,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
+
+protected:
+#if 0
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr *MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      int FrameIndex) const;
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr *MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr *LoadMI) const;
+#endif
+public:
+#if 0
+  bool canFoldMemoryOperand(const MachineInstr *MI,
+                            const SmallVectorImpl<unsigned> &Ops) const;
+  bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                           unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+                           SmallVectorImpl<MachineInstr *> &NewMIs) const;
+  bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                           SmallVectorImpl<SDNode *> &NewNodes) const;
+  unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex = 0) const;
+#endif
+  bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                               int64_t &Offset1, int64_t &Offset2) const;
+  bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                               int64_t Offset1, int64_t Offset2,
+                               unsigned NumLoads) const;
+
+  /// Schedule BARRIER instructions differently.
+  /// Schedule this instruction based entirely on it's Sethi-Ullman number,
+  /// without raising or lowering it's priority based on use or def numbers.
+  /// What this really says is that the instruction has some effect on execution
+  /// that is not modeled in the DAG. (For instance, a multi-thread execution
+  /// barrier.) On the GPU AMDIL backend, moving these instructions too far up
+  /// or down in the execution can artificially constrain the scheduling in the
+  /// shared compiler.
+  bool shouldScheduleWithNormalPriority(SDNode* instruction) const;
+
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+  void insertNoop(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MI) const;
+  bool isPredicated(const MachineInstr *MI) const;
+
+  bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+  bool DefinesPredicate(MachineInstr *MI,
+                        std::vector<MachineOperand> &Pred) const;
+  bool isPredicable(MachineInstr *MI) const;
+  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+  unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+  unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
+  unsigned getInlineAsmLength(const char *Str,
+                              const MCAsmInfo &MAI) const;
+
+};
+
+}
+
+#endif // AMDILINSTRINFO_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,132 @@
+//===-- AMDILInstrInfo.td -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the AMDIL instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def HasHWDDiv                 : Predicate<"Subtarget->device()"
+                           "->getGeneration() > AMDILDeviceInfo::HD4XXX && "
+              "Subtarget->device()->usesHardware(AMDILDeviceInfo::DoubleOps) && "
+            "(Subtarget->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX ||"
+              "Subtarget->calVersion() < CAL_VERSION_SC_155)">;
+
+// Predicate that is set to true if the hardware supports double, but not double
+// precision divide in hardware
+def HasSWDDiv             : Predicate<"Subtarget->device()"
+                           "->getGeneration() == AMDILDeviceInfo::HD4XXX &&"
+              "Subtarget->device()->usesHardware(AMDILDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware support 24bit signed
+// math ops. Otherwise a software expansion to 32bit math ops is used instead.
+def HasHWSign24Bit          : Predicate<"Subtarget->device()"
+                            "->getGeneration() > AMDILDeviceInfo::HD5XXX">;
+
+// Predicate that is set to true if 64bit operations are supported or not
+def HasHW64Bit              : Predicate<"Subtarget->device()"
+                            "->usesHardware(AMDILDeviceInfo::LongOps)">;
+def HasSW64Bit              : Predicate<"Subtarget->device()"
+                            "->usesSoftware(AMDILDeviceInfo::LongOps)">;
+
+// Predicate that is set to true if the timer register is supported
+def HasTmrRegister          : Predicate<"Subtarget->device()"
+                            "->isSupported(AMDILDeviceInfo::TmrReg)">;
+
+// Predicate that is true if we have region address space.
+def hasRegionAS             : Predicate<"Subtarget->device()"
+                            "->usesHardware(AMDILDeviceInfo::RegionMem)">;
+
+// Predicate that is false if we don't have region address space.
+def noRegionAS             : Predicate<"!Subtarget->device()"
+                            "->isSupported(AMDILDeviceInfo::RegionMem)">;
+
+
+// Predicate that is set to true if 64bit Mul is supported in the IL or not
+def HasHW64Mul              : Predicate<"Subtarget->calVersion()" 
+                                          ">= CAL_VERSION_SC_139"
+                                          "&& Subtarget->device()"
+                                          "->getGeneration() >="
+                                          "AMDILDeviceInfo::HD5XXX">;
+def HasSW64Mul              : Predicate<"Subtarget->calVersion()" 
+                                          "< CAL_VERSION_SC_139">;
+// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
+def HasHW64DivMod           : Predicate<"Subtarget->device()"
+                            "->usesHardware(AMDILDeviceInfo::HW64BitDivMod)">;
+def HasSW64DivMod           : Predicate<"Subtarget->device()"
+                            "->usesSoftware(AMDILDeviceInfo::HW64BitDivMod)">;
+
+
+// Predicate that is set to true if BFI/BFM are supported.
+def HasHWBitFieldInst : Predicate<"Subtarget->calVersion()"
+                                  ">= CAL_VERSION_SC_151"
+                                  "&& Subtarget->device()"
+                                  "->getGeneration() >="
+                                  "AMDILDeviceInfo::HD5XXX">;
+
+def HasHWDoubleAbs : Predicate<"Subtarget->calVersion()"
+                                   ">= CAL_VERSION_SC_153">;
+def HasSWDoubleAbs : Predicate<"Subtarget->calVersion()"
+                                   "< CAL_VERSION_SC_153">;
+def HasHWDoubleConv : Predicate<"Subtarget->calVersion()"
+                                   ">= CAL_VERSION_SC_155">;
+
+def IsEGOrLaterDevice : Predicate<"Subtarget->device()->getGeneration()"
+                          " >= AMDILDeviceInfo::HD5XXX">;
+def HasByteShortUAV         : Predicate<"Subtarget->device()"
+                            "->getGeneration() >= AMDILDeviceInfo::HD7XXX">;
+
+def Has64BitPtr             : Predicate<"Subtarget->is64bit()">;
+def Has32BitPtr             : Predicate<"!Subtarget->is64bit()">;
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+include "AMDILOperands.td"
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Type Profiles
+//===--------------------------------------------------------------------===//
+include "AMDILProfiles.td"
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Nodes
+//===--------------------------------------------------------------------===//
+include "AMDILNodes.td"
+
+//===--------------------------------------------------------------------===//
+// Custom Pattern DAG Nodes
+//===--------------------------------------------------------------------===//
+include "AMDILPatterns.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction format classes
+//===----------------------------------------------------------------------===//
+include "AMDILFormats.td"
+
+//===--------------------------------------------------------------------===//
+// Multiclass Instruction formats
+//===--------------------------------------------------------------------===//
+include "AMDILMultiClass.td"
+
+//===--------------------------------------------------------------------===//
+// Intrinsics support
+//===--------------------------------------------------------------------===//
+include "AMDILIntrinsics.td"
+
+//===--------------------------------------------------------------------===//
+// Instructions support
+//===--------------------------------------------------------------------===//
+include "AMDILInstructions.td"
+include "AMDILMem64.td"
+include "AMDILMem32.td"
+//===--------------------------------------------------------------------===//
+// Instruction Pattern support - This Must be the last include in the file
+// as it requires items defined in other files
+//===--------------------------------------------------------------------===//
+include "AMDILInstrPatterns.td"
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,401 @@
+//===-- AMDILInstrPatterns.td ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i32 (or GPRI32:$src0, GPRI32:$src1)),
+          (i32 (BINARY_OR_i32 GPRI32:$src0, GPRI32:$src1))>;
+
+// integer subtraction
+// a - b ==> a + (-b)
+def SUB_i8 : Pat<(sub GPRI8:$src0, GPRI8:$src1),
+    (ADD_i8 GPRI8:$src0, (NEGATE_i8 GPRI8:$src1))>;
+def SUB_v2i8 : Pat<(sub GPRV2I8:$src0, GPRV2I8:$src1),
+    (ADD_v2i8 GPRV2I8:$src0, (NEGATE_v2i8 GPRV2I8:$src1))>;
+def SUB_v4i8 : Pat<(sub GPRV4I8:$src0, GPRV4I8:$src1),
+    (ADD_v4i8 GPRV4I8:$src0, (NEGATE_v4i8 GPRV4I8:$src1))>;
+def SUB_i16 : Pat<(sub GPRI16:$src0, GPRI16:$src1),
+    (ADD_i16 GPRI16:$src0, (NEGATE_i16 GPRI16:$src1))>;
+def SUB_v2i16 : Pat<(sub GPRV2I16:$src0, GPRV2I16:$src1),
+    (ADD_v2i16 GPRV2I16:$src0, (NEGATE_v2i16 GPRV2I16:$src1))>;
+def SUB_v4i16 : Pat<(sub GPRV4I16:$src0, GPRV4I16:$src1),
+    (ADD_v4i16 GPRV4I16:$src0, (NEGATE_v4i16 GPRV4I16:$src1))>;
+def SUB_i32 : Pat<(sub GPRI32:$src0, GPRI32:$src1),
+    (ADD_i32 GPRI32:$src0, (NEGATE_i32 GPRI32:$src1))>;
+def SUB_v2i32 : Pat<(sub GPRV2I32:$src0, GPRV2I32:$src1),
+    (ADD_v2i32 GPRV2I32:$src0, (NEGATE_v2i32 GPRV2I32:$src1))>;
+def SUB_v4i32 : Pat<(sub GPRV4I32:$src0, GPRV4I32:$src1),
+    (ADD_v4i32 GPRV4I32:$src0, (NEGATE_v4i32 GPRV4I32:$src1))>;
+
+// Convert between float -> ulong efficiently
+// static ulong
+// cf2ul(float f)
+// {
+//     float fh = f * 0x1.0p-32f;
+//     uint uh = (uint)fh;
+//     float fuh = (float)uh;
+//     float fl = mad(-0x1.0p+32f, fuh, f);
+//     uint ul = (uint)fl;
+//     return as_ulong((uint2)(ul, uh));
+// }
+def FTOUL_i64 : Pat<(i64 (fp_to_uint GPRF32:$src0)),
+    (LCREATE
+     (FTOU 
+      (FMAD_f32 
+       (IL_ASFLOAT_i32 (LOADCONST_i32 0xcf800000)),
+       (UTOF 
+        (FTOU 
+         (MUL_IEEE_f32 GPRF32:$src0, 
+          (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000))
+         )
+        )
+       ),
+       GPRF32:$src0)
+     ),
+     (FTOU 
+      (MUL_IEEE_f32 GPRF32:$src0, 
+       (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000))
+      )
+     )
+    )>;
+
+// static ulong2
+// cf22ul2(float2 f)
+// {
+//     float2 fh = f * 0x1.0p-32f;
+//     uint2 uh = convert_uint2(fh);
+//     float2 fuh = convert_float2(uh);
+//     float2 fl = mad(-0x1.0p+32f, fuh, f);
+//     uint2 ul = convert_uint2(fl);
+//     return as_ulong2((uint4)(ul, uh));
+// }
+def FTOUL_v2i64 : Pat<(v2i64 (fp_to_uint GPRV2F32:$src0)),
+    (LCREATE_v2i64
+     (FTOU_v2i32
+      (FMAD_v2f32
+       (VCREATE_v2f32 
+        (IL_ASFLOAT_i32 (LOADCONST_i32 0xcf800000))),
+       (UTOF_v2f32 (FTOU_v2i32 
+                    (MUL_IEEE_v2f32 GPRV2F32:$src0, 
+                     (VCREATE_v2f32 
+                      (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000)))))),
+       GPRV2F32:$src0)),
+     (FTOU_v2i32 (MUL_IEEE_v2f32 GPRV2F32:$src0, 
+                  (VCREATE_v2f32 
+                   (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000))))))>;
+
+// LLVM isn't lowering this correctly, so writing a pattern that
+// matches it isntead.
+def : Pat<(build_vector (f32 fpimm:$src)),
+    (VCREATE_v4f32 (LOADCONST_f32 fpimm:$src))>;
+def : Pat<(build_vector (i32 imm:$src)),
+    (VCREATE_v4i32 (LOADCONST_i32 imm:$src))>;
+def : Pat<(build_vector (i16 imm:$src)),
+    (VCREATE_v4i16 (LOADCONST_i16 imm:$src))>;
+def : Pat<(build_vector (i8 imm:$src)),
+    (VCREATE_v4i8 (LOADCONST_i8 imm:$src))>;
+def : Pat<(build_vector (f64 fpimm:$src)),
+    (VCREATE_v2f64 (LOADCONST_f64 fpimm:$src))>;
+def : Pat<(build_vector (f32 fpimm:$src)),
+    (VCREATE_v2f32 (LOADCONST_f32 fpimm:$src))>;
+def : Pat<(build_vector (i64 imm:$src)),
+    (VCREATE_v2i64 (LOADCONST_i64 imm:$src))>;
+def : Pat<(build_vector (i32 imm:$src)),
+    (VCREATE_v2i32 (LOADCONST_i32 imm:$src))>;
+def : Pat<(build_vector (i16 imm:$src)),
+    (VCREATE_v2i16 (LOADCONST_i16 imm:$src))>;
+def : Pat<(build_vector (i8 imm:$src)),
+    (VCREATE_v2i8 (LOADCONST_i8 imm:$src))>;
+
+// Correctly lower shl with 32bit left hand side immediate
+def : Pat<(i32 (shl imm:$src, GPRI64:$shift)),
+    (SHL_i32 (LOADCONST_i32 imm:$src), (LLO GPRI64:$shift))>;
+def : Pat<(i32 (sra imm:$src, GPRI64:$shift)),
+    (SHL_i32 (LOADCONST_i32 imm:$src), (LLO GPRI64:$shift))>;
+def : Pat<(i32 (srl imm:$src, GPRI64:$shift)),
+    (SHL_i32 (LOADCONST_i32 imm:$src), (LLO GPRI64:$shift))>;
+def : Pat<(i32 (shl GPRI32:$src, GPRI64:$shift)),
+    (SHL_i32 GPRI32:$src, (LLO GPRI64:$shift))>;
+def : Pat<(i32 (sra GPRI32:$src, GPRI64:$shift)),
+    (SHL_i32 GPRI32:$src, (LLO GPRI64:$shift))>;
+def : Pat<(i32 (srl GPRI32:$src, GPRI64:$shift)),
+    (SHL_i32 GPRI32:$src, (LLO GPRI64:$shift))>;
+// Correctly lower shl with 64bit right hand side immediate
+def : Pat<(i32 (shl GPRI32:$src, (i64 imm:$shift))),
+    (SHL_i32 GPRI32:$src, (LLO (LOADCONST_i64 imm:$shift)))>;
+def : Pat<(i32 (sra GPRI32:$src, (i64 imm:$shift))),
+    (SHL_i32 GPRI32:$src, (LLO (LOADCONST_i64 imm:$shift)))>;
+def : Pat<(i32 (srl GPRI32:$src, (i64 imm:$shift))),
+    (SHL_i32 GPRI32:$src, (LLO (LOADCONST_i64 imm:$shift)))>;
+
+// Calls:
+def : Pat<(IL_call tglobaladdr:$dst),
+    (CALL tglobaladdr:$dst)>;
+def : Pat<(IL_call texternalsym:$dst),
+    (CALL texternalsym:$dst)>;
+def : Pat<(IL_call tconstpool:$dst),
+  (CALL tconstpool:$dst)>;
+
+include "AMDILConversions.td"
+
+/// Bitfield Insert pattern fragments
+def isLoadConstantAllOnes : PatLeaf<(timm),
+    [{
+      return N->isAllOnesValue();
+    }]>;
+
+/// Pattern 1: (lhs & bitpat) | (rhs & ~bitpat)
+def bfi_pat1 : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+    (or 
+     (and node:$lhs, node:$bitpat), 
+     (and node:$rhs, (not node:$bitpat)))>;
+
+/// Pattern 1b: (lhs & bitpat) | (rhs & ~bitpat)
+/// FIXME: This pattern needs to be removed, but requires cleanup of IL_or
+def bfi_pat1b : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+    (IL_or 
+     (and node:$lhs, node:$bitpat), 
+     (and node:$rhs, (not node:$bitpat)))>;
+
+/// Pattern 2: (lhs & bitpat) | (rhs & (bitpat ^ -1))
+def bfi_pat2 : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+    (or 
+     (and node:$lhs, node:$bitpat),
+     (and node:$rhs, 
+      (xor node:$bitpat, isLoadConstantAllOnes) ))>;
+
+/// Pattern 2b: (lhs & bitpat) | (rhs & (bitpat ^ -1))
+/// FIXME: This pattern needs to be removed, but requires cleanup of IL_or
+def bfi_pat2b : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+    (IL_or 
+     (and node:$lhs, node:$bitpat),
+     (and node:$rhs, 
+      (xor node:$bitpat, isLoadConstantAllOnes) ))>;
+
+/// Pattern 3: (rhs ^ ((rhs ^ lhs) & bitpat))
+def bfi_pat3 : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+    (xor node:$rhs, 
+     (and (xor node:$rhs, node:$lhs),
+       node:$bitpat))>;
+
+/// Bitfield Insert pattern fragments
+def isLoadConstantOne : PatLeaf<(timm),
+    [{
+      return N->isOne();
+    }]>;
+
+def is0x1FConstant : PatLeaf<(timm),
+    [{
+      return N->getZExtValue() == 0x1F;
+    }]>;
+
+def is0x3EConstant : PatLeaf<(timm),
+    [{
+      return N->getZExtValue() == 0x3E;
+    }]>;
+
+def is0x1FConstantOrLess : PatLeaf<(timm),
+    [{
+      return N->getZExtValue() <= 0x1F;
+    }]>;
+
+def bitmask_5bits : PatFrag<(ops node:$mask),
+  (and node:$mask, (i32 0x1f))>;
+
+/// Bitfield mask instruction patterns.
+/// Pattern 1: ((1 << (width & 0x1F)) + 0xFFFFFFFF) << (offset & 0x1F)
+def bfm_pat1 : PatFrag<(ops node:$width, node:$offset),
+    (shl (add (shl (i32 1), (bitmask_5bits node:$width)), (i32 0xFFFFFFFF)),
+     (bitmask_5bits node:$offset))>;
+
+/// Pattern 1: ((1 << (width & 0x1F)) + 0xFFFFFFFF) << (offset & 0x1F)
+/// FIXME: Need to remove this pattern, but requires clean up of IL_add pattern.
+def bfm_pat1b : PatFrag<(ops node:$width, node:$offset),
+    (shl (IL_add (shl (i32 1), (bitmask_5bits node:$width)), (i32 0xFFFFFFFF)),
+     (bitmask_5bits node:$offset))>;
+
+let Predicates = [HasHWBitFieldInst] in {
+defm BFI_PAT1A : TernaryPatFragI32<IL_OP_BFI, bfi_pat1>;
+defm BFI_PAT1B : TernaryPatFragI32<IL_OP_BFI, bfi_pat1b>;
+defm BFI_PAT2A : TernaryPatFragI32<IL_OP_BFI, bfi_pat2>;
+defm BFI_PAT2B : TernaryPatFragI32<IL_OP_BFI, bfi_pat2b>;
+defm BFI_PAT3  : TernaryPatFragI32<IL_OP_BFI, bfi_pat3>;
+defm BFM_PAT1A  : BinaryPatFragI32<IL_OP_BFM, bfm_pat1>;
+defm BFM_PAT1B  : BinaryPatFragI32<IL_OP_BFM, bfm_pat1b>;
+}
+
+//
+// bitalign
+// dst = (src0 << (32 - src2[4:0])) | (src1 >> src2[4:0])
+
+// A.  src2 is constant
+def bitalign_1 : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
+    (or (shl  node:$src0, node:$src3), (srl  node:$src1, node:$src2)),
+    [{
+      SDNode *N_or1 = N->getOperand(1).getNode();
+      SDNode *N_src2 = N_or1->getOperand(1).getNode();
+      ConstantSDNode* CN_src2 = dyn_cast<ConstantSDNode>(N_src2);
+      if (!CN_src2) {
+        return false;
+      }
+
+      SDNode *N_or0 = N->getOperand(0).getNode();
+      SDNode *N_src3 = N_or0->getOperand(1).getNode();
+      ConstantSDNode* CN_src3 = dyn_cast<ConstantSDNode>(N_src3);
+      if (!CN_src3) {
+        return false;
+      }
+
+      uint32_t csrc2 = CN_src2->getZExtValue();
+      uint32_t csrc3 = CN_src3->getZExtValue();
+      return (csrc3 == (32 - csrc2));
+    }]>;
+
+def bitalign_1b : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
+    (IL_or (shl  node:$src0, node:$src3), (srl  node:$src1, node:$src2)),
+    [{
+      SDNode *N_or1 = N->getOperand(1).getNode();
+      SDNode *N_src2 = N_or1->getOperand(1).getNode();
+      ConstantSDNode* CN_src2 = dyn_cast<ConstantSDNode>(N_src2);
+      if (!CN_src2) {
+        return false;
+      }
+
+      SDNode *N_or0 = N->getOperand(0).getNode();
+      SDNode *N_src3 = N_or0->getOperand(1).getNode();
+      ConstantSDNode* CN_src3 = dyn_cast<ConstantSDNode>(N_src3);
+      if (!CN_src3) {
+        return false;
+      }
+
+      uint32_t csrc2 = CN_src2->getZExtValue();
+      uint32_t csrc3 = CN_src3->getZExtValue();
+      return (csrc3 == (32 - csrc2));
+    }]>;
+
+def bitalign_2 : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
+    (or (srl  node:$src1, node:$src2), (shl  node:$src0, node:$src3)),
+    [{
+      SDNode *N_or0 = N->getOperand(0).getNode();
+      SDNode *N_src2 = N_or0->getOperand(1).getNode();
+      ConstantSDNode* CN_src2 = dyn_cast<ConstantSDNode>(N_src2);
+      if (!CN_src2) {
+        return false;
+      }
+
+      SDNode *N_or1 = N->getOperand(1).getNode();
+      SDNode *N_src3 = N_or1->getOperand(1).getNode();
+      ConstantSDNode* CN_src3 = dyn_cast<ConstantSDNode>(N_src3);
+      if (!CN_src3) {
+        return false;
+      }
+
+      uint32_t csrc2 = CN_src2->getZExtValue();
+      uint32_t csrc3 = CN_src3->getZExtValue();
+      return (csrc3 == (32 - csrc2));
+    }]>;
+
+def bitalign_2b : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
+    (IL_or (srl  node:$src1, node:$src2), (shl  node:$src0, node:$src3)),
+    [{
+      SDNode *N_or0 = N->getOperand(0).getNode();
+      SDNode *N_src2 = N_or0->getOperand(1).getNode();
+      ConstantSDNode* CN_src2 = dyn_cast<ConstantSDNode>(N_src2);
+      if (!CN_src2) {
+        return false;
+      }
+        
+      SDNode *N_or1 = N->getOperand(1).getNode();
+      SDNode *N_src3 = N_or1->getOperand(1).getNode();
+      ConstantSDNode* CN_src3 = dyn_cast<ConstantSDNode>(N_src3);
+      if (!CN_src3) {
+        return false;
+      }
+
+      uint32_t csrc2 = CN_src2->getZExtValue();
+      uint32_t csrc3 = CN_src3->getZExtValue();
+      return (csrc3 == (32 - csrc2));
+    }]>;
+
+// B.  src2 is a variable
+
+def bitalign_3 : PatFrag<(ops node:$src0, node:$src1, node:$src2),
+    (or (shl  node:$src0,
+                (bitmask_5bits (sub (i32 0), node:$src2))),
+        (srl  node:$src1, (bitmask_5bits node:$src2)))>;
+
+def bitalign_3b : PatFrag<(ops node:$src0, node:$src1, node:$src2),
+    (IL_or (shl  node:$src0,
+                 (bitmask_5bits (sub (i32 0), node:$src2))),
+           (srl  node:$src1, (bitmask_5bits node:$src2)))>;
+
+// TODO: Using FourInOneOut requires four inputs, but bitalign is actually
+// three inputs... Need to improve this.
+multiclass BitAlignPatFragCI32<ILOpCode opc, PatFrag node> {
+  def _i32 : FourInOneOut<opc, (outs GPRI32:$dst),
+      (ins GPRI32:$src0, GPRI32:$src1, GPRI32:$src2, GPRI32:$src3),
+      !strconcat(opc.Text, " $dst, $src0, $src1, $src2"),
+      [(set GPRI32:$dst, (node GPRI32:$src0, GPRI32:$src1, GPRI32:$src2, GPRI32:$src3))]>;
+}
+
+multiclass BitAlignPatFragSI32<ILOpCode opc, PatFrag node> {
+  def _i32 : ThreeInOneOut<opc, (outs GPRI32:$dst),
+      (ins GPRI32:$src0, GPRI32:$src1, GPRI32:$src2),
+      !strconcat(opc.Text, " $dst, $src0, $src1, $src2"),
+      [(set GPRI32:$dst, (node GPRI32:$src0, GPRI32:$src1, GPRI32:$src2))]>;
+}
+
+// Do bitalign pattern recognization if device is EG or later.
+let Predicates = [IsEGOrLaterDevice] in {
+defm BITALIGN_PAT_1  : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_1>;
+defm BITALIGN_PAT_1B : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_1b>;
+defm BITALIGN_PAT_2  : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_2>;
+defm BITALIGN_PAT_2B : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_2b>;
+defm BITALIGN_PAT_3  : BitAlignPatFragSI32<IL_OP_BIT_ALIGN, bitalign_3>;
+defm BITALIGN_PAT_3B : BitAlignPatFragSI32<IL_OP_BIT_ALIGN, bitalign_3b>;
+}
+
+// unpack[0-3] dst, src
+
+def unpack0 : PatFrag<(ops node:$src),
+    (uint_to_fp (and node:$src, (i32 0xFF)))>;
+def unpack0_1 : PatFrag<(ops node:$src),
+    (uint_to_fp (i32 (int_AMDIL_bit_extract_u32 (i32 8), (i32 0), node:$src)))>;
+def unpack1 : PatFrag<(ops node:$src),
+    (uint_to_fp (and (srl node:$src, (i32 8)), (i32 0xFF)))>;
+def unpack1_1 : PatFrag<(ops node:$src),
+    (uint_to_fp (i32 (int_AMDIL_bit_extract_u32 (i32 8), (i32 8), node:$src)))>;
+def unpack2 : PatFrag<(ops node:$src),
+    (uint_to_fp (and (srl node:$src, (i32 16)), (i32 0xFF)))>;
+def unpack2_1 : PatFrag<(ops node:$src),
+    (uint_to_fp (i32 (int_AMDIL_bit_extract_u32 (i32 8), (i32 16), node:$src)))>;
+def unpack3 : PatFrag<(ops node:$src), (uint_to_fp (srl node:$src, (i32 24)))>;
+def unpack3_1 : PatFrag<(ops node:$src),
+    (uint_to_fp (i32 (int_AMDIL_bit_extract_u32 (i32 8), (i32 24), node:$src)))>;
+
+multiclass UnpackPatFrag<ILOpCode opc, PatFrag node> {
+  def _i32 : OneInOneOut<opc, (outs GPRF32:$dst),
+      (ins GPRI32:$src),
+      !strconcat(opc.Text, " $dst, $src"),
+      [(set GPRF32:$dst, (node GPRI32:$src))]>;
+}
+
+let Predicates = [IsEGOrLaterDevice] in {
+defm UNPACK_PAT0   : UnpackPatFrag<IL_OP_UNPACK_0, unpack0>;
+defm UNPACK_PAT0_1 : UnpackPatFrag<IL_OP_UNPACK_0, unpack0_1>;
+defm UNPACK_PAT1   : UnpackPatFrag<IL_OP_UNPACK_1, unpack1>;
+defm UNPACK_PAT1_1 : UnpackPatFrag<IL_OP_UNPACK_1, unpack1_1>;
+defm UNPACK_PAT2   : UnpackPatFrag<IL_OP_UNPACK_2, unpack2>;
+defm UNPACK_PAT2_1 : UnpackPatFrag<IL_OP_UNPACK_2, unpack2_1>;
+defm UNPACK_PAT3   : UnpackPatFrag<IL_OP_UNPACK_3, unpack3>;
+defm UNPACK_PAT3_1 : UnpackPatFrag<IL_OP_UNPACK_3, unpack3_1>;
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1310 @@
+//===-- AMDILInstructions.td ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+let isReMaterializable = 0, isAsCheapAsAMove = 1 in {
+  defm LOADCONST  : ILConstant<"mov $dst, $val">;
+  defm MOVE       : UnaryOpMC<IL_OP_MOV, IL_mov>;
+  defm PHIMOVE    : UnaryOpMC<IL_OP_MOV, IL_phimov>;
+}
+defm BINARY_NOT : UnaryOpMC<IL_OP_I_NOT, IL_not>;
+defm BINARY_OR  : BinaryOpMC<IL_OP_I_OR, IL_or>;
+defm BINARY_AND : BinaryOpMC<IL_OP_AND, IL_and>;
+defm BINARY_XOR : BinaryOpMC<IL_OP_I_XOR, IL_xor>;
+defm AND        : BinaryOpMCInt<IL_OP_AND, and>;
+defm CMOV       : BinaryOpMC<IL_OP_CMOV, IL_cmov>;
+defm DIV_INF    : BinaryOpMC<IL_OP_DIV_INF, IL_div_inf>;
+defm SMAX       : BinaryOpMCInt<IL_OP_I_MAX, IL_smax>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder for 64bit
+// instructions
+defm CMOVLOG    : TernaryOpMC<IL_OP_CMOV_LOGICAL, IL_cmov_logical>;
+defm SELECTBIN  : TernaryOpMCScalar<IL_OP_CMOV_LOGICAL, select>;
+//===---------------------------------------------------------------------===//
+// Signed 8bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+def INTTOANY_i8 : OneInOneOut<IL_OP_MOV, (outs GPRI8:$dst), (ins GPRI32:$src0),
+    !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+    [(set GPRI8:$dst, (IL_inttoany GPRI32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// Signed 16bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+def INTTOANY_i16: OneInOneOut<IL_OP_MOV, (outs GPRI16:$dst), (ins GPRI32:$src0),
+    !strconcat(IL_OP_MOV.Text," $dst, $src0"),
+    [(set GPRI16:$dst, (IL_inttoany GPRI32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// Signed 32bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+defm NEGATE     : UnaryOpMCi32<IL_OP_I_NEGATE, IL_inegate>;
+defm SMUL       : BinaryOpMCi32<IL_OP_I_MUL, mul>;
+defm SMULHI     : BinaryOpMCi32<IL_OP_I_MUL_HIGH, mulhs>;
+defm SHL        : BinaryOpMCi32Const<IL_OP_I_SHL, shl>;
+defm SHR        : BinaryOpMCi32Const<IL_OP_I_SHR, sra>;
+let Predicates = [Has64BitPtr] in {
+defm SHL        : BinaryOpMCi64Const<IL_OP_I_SHL, shl>;
+defm SHR        : BinaryOpMCi64Const<IL_OP_I_SHR, sra>;
+defm USHR       : BinaryOpMCi64Const<IL_OP_U_SHR, srl>;
+}
+defm SHLVEC     : BinaryOpMCi32<IL_OP_I_SHL, shl>;
+defm SHRVEC     : BinaryOpMCi32<IL_OP_I_SHR, sra>;
+defm ADD        : BinaryOpMCi32<IL_OP_I_ADD, add>;
+defm CUSTOM_XOR : BinaryOpMCInt<IL_OP_I_XOR, xor>;
+// get rid of the addri via the tablegen instead of custom lowered instruction
+defm CUSTOM_ADD : BinaryOpMCi32<IL_OP_I_ADD, IL_add>;
+defm EADD   : BinaryOpMCi32<IL_OP_I_ADD, adde>;
+def INTTOANY_i32: OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins GPRI32:$src0),
+    !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+    [(set GPRI32:$dst, (IL_inttoany GPRI32:$src0))]>;
+// Integer offsets for addressing
+def ADDir       : TwoInOneOut<IL_OP_I_ADD, (outs GPRI32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$offset),
+          !strconcat(IL_OP_I_ADD.Text, " $dst, $ptr, $offset"),
+          [(set GPRI32:$dst,
+        (IL_addaddrri ADDR:$ptr,
+          (i32 GPRI32:$offset)))]>;
+def ADDri       : TwoInOneOut<IL_OP_I_ADD, (outs GPRI32:$dst),
+      (ins GPRI32:$offset,  MEM3232:$ptr),
+          !strconcat(IL_OP_I_ADD.Text, " $dst, $offset, $ptr"),
+          [(set GPRI32:$dst,
+        (IL_addaddrir
+          (i32 GPRI32:$offset), ADDR:$ptr))]>;
+
+defm IFFB_HI    : UnaryOpMCi32<IL_OP_I_FFB_HI, IL_ffb_hi>;
+defm IFFB_LO    : UnaryOpMCi32<IL_OP_I_FFB_LO, IL_ffb_lo>;
+let mayLoad = 0, mayStore = 0 in {
+defm ABS : UnaryIntrinsicInt<IL_OP_ABS, int_AMDIL_abs>;
+defm BITCOUNT : UnaryIntrinsicInt<IL_OP_IBIT_COUNT, int_AMDIL_bit_count_i32>;
+defm FFB_LO : UnaryIntrinsicInt<IL_OP_I_FFB_LO, int_AMDIL_bit_find_first_lo>;
+defm FFB_HI : UnaryIntrinsicInt<IL_OP_I_FFB_HI, int_AMDIL_bit_find_first_hi>;
+defm FFB_SGN : UnaryIntrinsicInt<IL_OP_I_FFB_SGN,
+        int_AMDIL_bit_find_first_sgn>;
+defm IMULHI  : BinaryIntrinsicInt<IL_OP_I_MUL_HIGH, int_AMDIL_mulhi_i32>;
+let Predicates = [HasHWSign24Bit] in {
+defm IMUL24 : BinaryIntrinsicInt<IL_OP_I_MUL24, int_AMDIL_mul24_i32>;
+defm IMULHI24 : BinaryIntrinsicInt<IL_OP_I_MULHI24, int_AMDIL_mulhi24_i32>;
+defm IMAD24  : TernaryIntrinsicInt<IL_OP_I_MAD24, int_AMDIL_mad24_i32>;
+}
+defm CARRY  : BinaryIntrinsicInt<IL_OP_I_CARRY, int_AMDIL_carry_i32>;
+defm BORROW  : BinaryIntrinsicInt<IL_OP_I_BORROW, int_AMDIL_borrow_i32>;
+defm IMIN  : BinaryIntrinsicInt<IL_OP_I_MIN, int_AMDIL_min_i32>;
+defm IMAX  : BinaryIntrinsicInt<IL_OP_I_MAX, int_AMDIL_max_i32>;
+defm CMOV_LOG  : TernaryIntrinsicInt<IL_OP_CMOV_LOGICAL,
+          int_AMDIL_cmov_logical>;
+defm IBIT_EXTRACT : TernaryIntrinsicInt<IL_OP_IBIT_EXTRACT,
+          int_AMDIL_bit_extract_i32>;
+defm IMAD  : TernaryIntrinsicInt<IL_OP_I_MAD, int_AMDIL_mad_i32>;
+defm SAD  : TernaryIntrinsicInt<IL_OP_SAD, int_AMDIL_media_sad>;
+defm SADHI  : TernaryIntrinsicInt<IL_OP_SAD_HI,
+          int_AMDIL_media_sad_hi>;
+}
+def SAD4_i32  : ThreeInOneOut<IL_OP_SAD4, (outs GPRI32:$dst),
+      (ins GPRV4I32:$src, GPRV4I32:$src1, GPRI32:$src2),
+      !strconcat(IL_OP_SAD4.Text, " $dst, $src, $src1, $src2"),
+      [(set GPRI32:$dst,
+      (int_AMDIL_media_sad4 GPRV4I32:$src, GPRV4I32:$src1,
+      GPRI32:$src2))]>;
+def FTOV4U8_i32 : OneInOneOut<IL_OP_F2U4, (outs GPRI32:$dst),
+      (ins GPRV4F32:$src),
+      !strconcat(IL_OP_F2U4.Text, " $dst, $src"),
+      [(set GPRI32:$dst,
+      (int_AMDIL_media_convert_f2v4u8 GPRV4F32:$src))]>;
+//===---------------------------------------------------------------------===//
+// Unsigned 32bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+defm UMUL       : BinaryOpMCi32<IL_OP_U_MUL, IL_umul>;
+defm UMULHI     : BinaryOpMCi32<IL_OP_U_MUL_HIGH, mulhu>;
+defm USHR       : BinaryOpMCi32Const<IL_OP_U_SHR, srl>;
+defm USHRVEC    : BinaryOpMCi32<IL_OP_U_SHR, srl>;
+defm UDIV       : BinaryOpMCi32<IL_OP_U_DIV, udiv>;
+defm NATIVE_UDIV  : BinaryIntrinsicInt<IL_OP_U_DIV, int_AMDIL_udiv>;
+let mayLoad=0, mayStore=0 in {
+defm UBIT_REVERSE : UnaryIntrinsicInt<IL_OP_UBIT_REVERSE,
+        int_AMDIL_bit_reverse_u32>;
+defm UMULHI_INT : BinaryIntrinsicInt<IL_OP_U_MUL_HIGH, int_AMDIL_mulhi_u32>;
+defm UMULHI24   : BinaryIntrinsicInt<IL_OP_U_MULHI24, int_AMDIL_mulhi24_u32>;
+defm UMUL24     : BinaryIntrinsicInt<IL_OP_U_MUL24, int_AMDIL_mul24_u32>;
+defm UMIN  : BinaryIntrinsicInt<IL_OP_U_MIN, int_AMDIL_min_u32>;
+defm UMAX  : BinaryIntrinsicInt<IL_OP_U_MAX, int_AMDIL_max_u32>;
+defm UBIT_EXTRACT : TernaryIntrinsicInt<IL_OP_UBIT_EXTRACT,
+          int_AMDIL_bit_extract_u32>;
+defm UBIT_INSERT : QuaternaryIntrinsicInt<IL_OP_UBIT_INSERT,
+          int_AMDIL_bit_insert_u32>;
+defm BFI : TernaryIntrinsicInt<IL_OP_BFI, int_AMDIL_bfi>;
+defm BFM : BinaryIntrinsicInt<IL_OP_BFM, int_AMDIL_bfm>;
+defm UMAD  : TernaryIntrinsicInt<IL_OP_U_MAD, int_AMDIL_mad_u32>;
+defm UMAD24  : TernaryIntrinsicInt<IL_OP_U_MAD24, int_AMDIL_mad24_u32>;
+defm U4LERP  : TernaryIntrinsicInt<IL_OP_U4_LERP,
+          int_AMDIL_media_lerp_u4>;
+defm BITALIGN : TernaryIntrinsicInt<IL_OP_BIT_ALIGN, int_AMDIL_media_bitalign>;
+defm BYTEALIGN : TernaryIntrinsicInt<IL_OP_BYTE_ALIGN, int_AMDIL_media_bytealign>;
+}
+//===---------------------------------------------------------------------===//
+// Signed 64bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+def LNEGATE     : OneInOneOut<IL_OP_I64_NEGATE,  (outs GPRI64:$dst), (ins GPRI64:$src),
+                !strconcat(IL_OP_I64_NEGATE.Text, " $dst, $src"),
+                [(set GPRI64:$dst, (IL_inegate GPRI64:$src))]>;
+def LNEGATE_v2i64: OneInOneOut<IL_OP_I64_NEGATE,  (outs GPRV2I64:$dst),
+                (ins GPRV2I64:$src),
+                !strconcat(IL_OP_I64_NEGATE.Text, " $dst, $src"),
+                [(set GPRV2I64:$dst, (IL_inegate GPRV2I64:$src))]>;
+let Predicates = [HasHW64Bit] in {
+def LADD_i64        : TwoInOneOut<IL_OP_I64_ADD, (outs GPRI64:$dst),
+                  (ins GPRI64:$src1, GPRI64:$src2),
+                  !strconcat(IL_OP_I64_ADD.Text, " $dst, $src1, $src2"),
+                [(set GPRI64:$dst, (IL_add GPRI64:$src1, GPRI64:$src2))]>;
+def LADD_v2i64        : TwoInOneOut<IL_OP_I64_ADD, (outs GPRV2I64:$dst),
+                  (ins GPRV2I64:$src1, GPRV2I64:$src2),
+                  !strconcat(IL_OP_I64_ADD.Text, " $dst, $src1, $src2"),
+                [(set GPRV2I64:$dst, (IL_add GPRV2I64:$src1, GPRV2I64:$src2))]>;
+defm IMIN64 : BinaryIntrinsicLong<IL_OP_I64_MIN, int_AMDIL_min_i32>;
+defm UMIN64 : BinaryIntrinsicLong<IL_OP_U64_MIN, int_AMDIL_min_u32>;
+defm IMAX64 : BinaryIntrinsicLong<IL_OP_I64_MAX, int_AMDIL_max_i32>;
+defm UMAX64 : BinaryIntrinsicLong<IL_OP_U64_MAX, int_AMDIL_max_u32>;
+}
+let Predicates = [HasHW64Bit] in {
+def LSHR        : TwoInOneOut<IL_OP_I64_SHR, (outs GPRI64:$dst),
+                  (ins GPRI64:$src1, GPRI32:$src2),
+                  !strconcat(IL_OP_I64_SHR.Text, " $dst, $src1, $src2"),
+                [(set GPRI64:$dst, (sra GPRI64:$src1, GPRI32:$src2))]>;
+def LSHL       : TwoInOneOut<IL_OP_I64_SHL, (outs GPRI64:$dst),
+                  (ins GPRI64:$src1, GPRI32:$src2),
+                  !strconcat(IL_OP_I64_SHL.Text, " $dst, $src1, $src2"),
+                [(set GPRI64:$dst, (shl GPRI64:$src1, GPRI32:$src2))]>;
+// Apple requires a pattern since they pass down the shift operand as
+// a 64bit value, although the lower 6 bits are all that are used.
+def LSHR_APPLE        : TwoInOneOut<IL_OP_I64_SHR, (outs GPRI64:$dst),
+                  (ins GPRI64:$src1, GPRI64:$src2),
+                  !strconcat(IL_OP_I64_SHR.Text, " $dst, $src1, $src2"),
+                [(set GPRI64:$dst, (sra GPRI64:$src1, GPRI64:$src2))]>;
+def LSHL_APPLE       : TwoInOneOut<IL_OP_I64_SHL, (outs GPRI64:$dst),
+                  (ins GPRI64:$src1, GPRI64:$src2),
+                  !strconcat(IL_OP_I64_SHL.Text, " $dst, $src1, $src2"),
+                [(set GPRI64:$dst, (shl GPRI64:$src1, GPRI64:$src2))]>;
+}
+
+
+//===---------------------------------------------------------------------===//
+// Unsigned 64bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+let Predicates = [HasTmrRegister] in {
+  def Tmr : ILFormat<IL_OP_MOV, (outs GPRXYI64:$tmr),
+      (ins), !strconcat(IL_OP_MOV.Text, " $tmr, Tmr.xyxy"),
+      [(set GPRXYI64:$tmr, (int_AMDIL_get_cycle_count))]>;
+}
+let Predicates = [IsEGOrLaterDevice] in {
+def CU_ID : ILFormat<IL_OP_CU_ID, (outs GPRI32:$id), (ins),
+    !strconcat(IL_OP_CU_ID.Text, " $id"),
+    [(set GPRI32:$id, (int_AMDIL_compute_unit_id))]>;
+def WAVE_ID : ILFormat<IL_OP_WAVE_ID, (outs GPRI32:$id), (ins),
+    !strconcat(IL_OP_WAVE_ID.Text, " $id"),
+    [(set GPRI32:$id, (int_AMDIL_wavefront_id))]>;
+}
+let Predicates = [HasHW64Bit] in {
+def LUSHR        : TwoInOneOut<IL_OP_U64_SHR, (outs GPRI64:$dst),
+                  (ins GPRI64:$src1, GPRI32:$src2),
+                  !strconcat(IL_OP_U64_SHR.Text, " $dst, $src1, $src2"),
+                [(set GPRI64:$dst, (srl GPRI64:$src1, GPRI32:$src2))]>;
+// Apple requires a pattern since they pass down the shift operand as
+// a 64bit value, although the lower 6 bits are all that are used.
+def LUSHR_APPLE        : TwoInOneOut<IL_OP_U64_SHR, (outs GPRI64:$dst),
+                  (ins GPRI64:$src1, GPRI64:$src2),
+                  !strconcat(IL_OP_U64_SHR.Text, " $dst, $src1, $src2"),
+                [(set GPRI64:$dst, (srl GPRI64:$src1, GPRI64:$src2))]>;
+}
+
+
+//===---------------------------------------------------------------------===//
+// Generic Float Instructions
+//===---------------------------------------------------------------------===//
+let hasIEEEFlag = 1 in {
+defm MUL_IEEE  : BinaryOpMCFloat<IL_OP_MUL_IEEE, IL_OP_D_MUL, fmul>;
+}
+defm ADD  : BinaryOpMCFloat<IL_OP_ADD, IL_OP_D_ADD, fadd>;
+//===---------------------------------------------------------------------===//
+// float math instructions start here
+//===---------------------------------------------------------------------===//
+let mayLoad=0, mayStore=0 in {
+defm ABS : UnaryIntrinsicFloat<IL_OP_ABS, int_AMDIL_fabs>;
+defm FRAC : UnaryIntrinsicFloat<IL_OP_FRC, int_AMDIL_fraction>;
+defm PIREDUCE : UnaryIntrinsicFloat<IL_OP_PI_REDUCE, int_AMDIL_pireduce>;
+defm ROUND_NEAREST : UnaryIntrinsicFloat<IL_OP_ROUND_NEAR,
+          int_AMDIL_round_nearest>;
+defm ROUND_NEGINF : UnaryIntrinsicFloat<IL_OP_ROUND_NEG_INF,
+          int_AMDIL_round_neginf>;
+defm ROUND_POSINF : UnaryIntrinsicFloat<IL_OP_ROUND_POS_INF,
+          int_AMDIL_round_posinf>;
+defm ROUND_ZERO : UnaryIntrinsicFloat<IL_OP_ROUND_ZERO,
+          int_AMDIL_round_zero>;
+defm ACOS : UnaryIntrinsicFloatScalar<IL_OP_ACOS, int_AMDIL_acos>;
+defm ATAN : UnaryIntrinsicFloatScalar<IL_OP_ATAN, int_AMDIL_atan>;
+defm ASIN : UnaryIntrinsicFloatScalar<IL_OP_ASIN, int_AMDIL_asin>;
+defm TAN : UnaryIntrinsicFloatScalar<IL_OP_TAN, int_AMDIL_tan>;
+defm SIN : UnaryIntrinsicFloatScalar<IL_OP_SIN, int_AMDIL_sin>;
+defm COS : UnaryIntrinsicFloatScalar<IL_OP_COS, int_AMDIL_cos>;
+defm SQRT : UnaryIntrinsicFloatScalar<IL_OP_SQRT, int_AMDIL_sqrt>;
+defm EXP : UnaryIntrinsicFloatScalar<IL_OP_EXP, int_AMDIL_exp>;
+defm EXPVEC : UnaryIntrinsicFloat<IL_OP_EXP_VEC, int_AMDIL_exp_vec>;
+defm SQRTVEC : UnaryIntrinsicFloat<IL_OP_SQRT_VEC, int_AMDIL_sqrt_vec>;
+defm COSVEC : UnaryIntrinsicFloat<IL_OP_COS_VEC, int_AMDIL_cos_vec>;
+defm SINVEC : UnaryIntrinsicFloat<IL_OP_SIN_VEC, int_AMDIL_sin_vec>;
+defm LOGVEC : UnaryIntrinsicFloat<IL_OP_LOG_VEC, int_AMDIL_log_vec>;
+defm RSQVEC : UnaryIntrinsicFloat<IL_OP_RSQ_VEC, int_AMDIL_rsq_vec>;
+defm EXN : UnaryIntrinsicFloatScalar<IL_OP_EXN, int_AMDIL_exn>;
+defm SIGN : UnaryIntrinsicFloat<IL_OP_SGN, int_AMDIL_sign>;
+defm LENGTH : UnaryIntrinsicFloat<IL_OP_LEN, int_AMDIL_length>;
+defm POW : BinaryIntrinsicFloat<IL_OP_POW, int_AMDIL_pow>;
+}
+
+let hasIEEEFlag = 1 in {
+  let mayLoad = 0, mayStore=0 in {
+defm MIN  : BinaryIntrinsicFloat<IL_OP_MIN, int_AMDIL_min>;
+defm MAX  : BinaryIntrinsicFloat<IL_OP_MAX, int_AMDIL_max>;
+defm MAD  : TernaryIntrinsicFloat<IL_OP_MAD, int_AMDIL_mad>;
+  }
+defm MOD  : BinaryOpMCf32<IL_OP_MOD, frem>;
+}
+let hasZeroOpFlag = 1 in {
+  let mayLoad = 0, mayStore=0 in {
+defm LN  : UnaryIntrinsicFloatScalar<IL_OP_LN, int_AMDIL_ln>;
+defm LOG : UnaryIntrinsicFloatScalar<IL_OP_LOG, int_AMDIL_log>;
+defm RSQ : UnaryIntrinsicFloatScalar<IL_OP_RSQ, int_AMDIL_rsq>;
+defm DIV_INT  : BinaryIntrinsicFloat<IL_OP_DIV, int_AMDIL_div>;
+defm DIV  : BinaryOpMCf32<IL_OP_DIV, fdiv>;
+defm DIV_PRECISE  : BinaryIntrinsicFloat<IL_OP_DIV_PRECISE, int_AMDIL_div_precise>;
+  }
+}
+  let mayLoad = 0, mayStore=0 in {
+defm CLAMP : TernaryIntrinsicFloat<IL_OP_CLAMP, int_AMDIL_clamp>;
+defm FMA  : TernaryIntrinsicFloat<IL_OP_FMA, int_AMDIL_fma>;
+defm LERP  : TernaryIntrinsicFloat<IL_OP_LERP, int_AMDIL_lerp>;
+  }
+defm SUB   : BinaryOpMCf32<IL_OP_SUB, fsub>;
+defm FABS  : UnaryOpMCf32<IL_OP_ABS, fabs>;
+defm FMAD  : TernaryOpMCf32<IL_OP_MAD, IL_mad>;
+defm NEARBY : UnaryOpMCf32<IL_OP_ROUND_NEAR, fnearbyint>;
+defm TRUNC  : UnaryOpMCf32<IL_OP_ROUND_ZERO, ftrunc>;
+defm CEIL   : UnaryOpMCf32<IL_OP_ROUND_POS_INF, fceil>;
+defm FLOOR  : UnaryOpMCf32<IL_OP_ROUND_NEG_INF, ffloor>;
+
+def NEG_f32         : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst),
+  (ins GPRF32:$src0),
+    !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(xyzw)"),
+    [(set GPRF32:$dst, (fneg GPRF32:$src0))]>;
+def INTTOANY_f32    : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst),
+  (ins GPRI32:$src0),
+    !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+    [(set GPRF32:$dst, (IL_inttoany GPRI32:$src0))]>;
+let hasIEEEFlag = 1 in {
+def DP2ADD_f32 : ThreeInOneOut<IL_OP_DP2_ADD, (outs GPRF32:$dst),
+    (ins GPRV2F32:$src0, GPRV2F32:$src1, GPRF32:$src2),
+    !strconcat(IL_OP_DP2_ADD.Text, " $dst, $src0, $src1, $src2"),
+    [(set GPRF32:$dst,
+    (int_AMDIL_dp2_add GPRV2F32:$src0,
+    GPRV2F32:$src1, GPRF32:$src2))]>;
+def DP2_f32 : TwoInOneOut<IL_OP_DP2, (outs GPRF32:$dst),
+    (ins GPRV2F32:$src0, GPRV2F32:$src1),
+    !strconcat(IL_OP_DP2.Text, " $dst, $src0, $src1"),
+    [(set GPRF32:$dst,
+    (int_AMDIL_dp2 GPRV2F32:$src0, GPRV2F32:$src1))]>;
+def DP3_f32 : TwoInOneOut<IL_OP_DP3, (outs GPRF32:$dst),
+    (ins GPRV4F32:$src0, GPRV4F32:$src1),
+    !strconcat(IL_OP_DP3.Text, " $dst, $src0, $src1"),
+    [(set GPRF32:$dst,
+    (int_AMDIL_dp3 GPRV4F32:$src0, GPRV4F32:$src1))]>;
+def DP4_f32 : TwoInOneOut<IL_OP_DP4, (outs GPRF32:$dst),
+    (ins GPRV4F32:$src0, GPRV4F32:$src1),
+    !strconcat(IL_OP_DP4.Text, " $dst, $src0, $src1"),
+    [(set GPRF32:$dst,
+    (int_AMDIL_dp4 GPRV4F32:$src0, GPRV4F32:$src1))]>;
+def FTZ_f32 : OneInOneOut<IL_OP_MUL_IEEE, (outs GPRF32:$dst),
+    (ins GPRF32:$src), !strconcat(IL_OP_MUL_IEEE.Text, " $dst, $src, r0.1"),
+    [(set GPRF32:$dst,
+        (int_AMDIL_ftz GPRF32:$src))]>;
+def FTZ_v2f32 : OneInOneOut<IL_OP_MUL_IEEE, (outs GPRV2F32:$dst),
+    (ins GPRV2F32:$src), !strconcat(IL_OP_MUL_IEEE.Text, " $dst, $src, r0.1"),
+    [(set GPRV2F32:$dst,
+        (int_AMDIL_ftz GPRV2F32:$src))]>;
+def FTZ_v4f32 : OneInOneOut<IL_OP_MUL_IEEE, (outs GPRV4F32:$dst),
+    (ins GPRV4F32:$src), !strconcat(IL_OP_MUL_IEEE.Text, " $dst, $src, r0.1"),
+    [(set GPRV4F32:$dst,
+        (int_AMDIL_ftz GPRV4F32:$src))]>;
+}
+defm UNPACK_B0 : IntrConvertI32TOF32<IL_OP_UNPACK_0, int_AMDIL_media_unpack_byte_0>;
+defm UNPACK_B1 : IntrConvertI32TOF32<IL_OP_UNPACK_1, int_AMDIL_media_unpack_byte_1>;
+defm UNPACK_B2 : IntrConvertI32TOF32<IL_OP_UNPACK_2, int_AMDIL_media_unpack_byte_2>;
+defm UNPACK_B3 : IntrConvertI32TOF32<IL_OP_UNPACK_3, int_AMDIL_media_unpack_byte_3>;
+defm FTOI_FLR  : IntrConvertF32TOI32<IL_OP_FTOI_FLR, int_AMDIL_convert_f32_i32_flr>;
+defm FTOI_RPI  : IntrConvertF32TOI32<IL_OP_FTOI_RPI, int_AMDIL_convert_f32_i32_rpi>;
+defm HTOF      : IntrConvertF16TOF32<IL_OP_F16_TO_F32, int_AMDIL_convert_f16_f32>;
+defm FTOH      : IntrConvertF32TOF16<IL_OP_F32_TO_F16, int_AMDIL_convert_f32_f16>;
+defm FTOH_NEAR     : IntrConvertF32TOF16<IL_OP_F32_TO_F16_NEAR, int_AMDIL_convert_f32_f16_near>;
+defm FTOH_NEG_INF  : IntrConvertF32TOF16<IL_OP_F32_TO_F16_NEG_INF, int_AMDIL_convert_f32_f16_neg_inf>;
+defm FTOH_PLUS_INF : IntrConvertF32TOF16<IL_OP_F32_TO_F16_PLUS_INF, int_AMDIL_convert_f32_f16_plus_inf>;
+//===---------------------------------------------------------------------===//
+// float math instructions end here
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// float2 math instructions start here
+//===---------------------------------------------------------------------===//
+def NEG_v2f32       : OneInOneOut<IL_OP_MOV, (outs GPRV2F32:$dst),
+  (ins GPRV2F32:$src0),
+    !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(xyzw)"),
+    [(set GPRV2F32:$dst, (fneg GPRV2F32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// float2 math instructions end here
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// float4 math instructions start here
+//===---------------------------------------------------------------------===//
+def NEG_v4f32 : OneInOneOut<IL_OP_MOV, (outs GPRV4F32:$dst),
+  (ins GPRV4F32:$src0),
+    !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(xyzw)"),
+    [(set GPRV4F32:$dst, (fneg GPRV4F32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// float4 math instructions end here
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// double math instructions start here
+//===---------------------------------------------------------------------===//
+def  SUB_f64       : TwoInOneOut<IL_OP_D_ADD, (outs GPRF64:$dst),
+  (ins GPRF64:$src0, GPRF64:$src1),
+     !strconcat(IL_OP_D_ADD.Text, " $dst, $src0, ${src1}_neg(yw)"),
+     [(set GPRF64:$dst, (fsub GPRF64:$src0, GPRF64:$src1))]>;
+def  SUB_v2f64      : TwoInOneOut<IL_OP_D_ADD, (outs GPRV2F64:$dst),
+  (ins GPRV2F64:$src0, GPRV2F64:$src1),
+     !strconcat(IL_OP_D_ADD.Text, " $dst, $src0, ${src1}_neg(yw)"),
+     [(set GPRV2F64:$dst, (fsub GPRV2F64:$src0, GPRV2F64:$src1))]>;
+def NEG_f64       : OneInOneOut<IL_OP_MOV, (outs GPRF64:$dst),
+  (ins GPRF64:$src0),
+    !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(yw)"),
+    [(set GPRF64:$dst, (fneg GPRF64:$src0))]>;
+def NEG_v2f64       : OneInOneOut<IL_OP_MOV, (outs GPRV2F64:$dst),
+  (ins GPRV2F64:$src0),
+    !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(yw)"),
+    [(set GPRV2F64:$dst, (fneg GPRV2F64:$src0))]>;
+  let mayLoad = 0, mayStore=0 in {
+defm MIN  : BinaryIntrinsicDouble<IL_OP_D_MIN, int_AMDIL_min>;
+defm MAX  : BinaryIntrinsicDouble<IL_OP_D_MAX, int_AMDIL_max>;
+defm DIV  : BinaryIntrinsicDouble<IL_OP_D_DIV, int_AMDIL_div>;
+defm MAD  : TernaryIntrinsicDouble<IL_OP_D_MAD, int_AMDIL_mad>;
+defm DFMA : TernaryIntrinsicDouble<IL_OP_D_MAD, int_AMDIL_fma>;
+defm FRAC : UnaryIntrinsicDouble<IL_OP_D_FRC, int_AMDIL_fraction>;
+defm SQRT : UnaryIntrinsicDouble<IL_OP_D_SQRT, int_AMDIL_sqrt>;
+defm RSQ  : UnaryIntrinsicDoubleScalar<IL_OP_D_RSQ, int_AMDIL_rsq>;
+defm RCP  : UnaryIntrinsicDoubleScalar<IL_OP_D_RCP, int_AMDIL_drcp>;
+defm DMAD : TernaryOpMCf64<IL_OP_D_MAD, IL_mad>;
+  }
+let Predicates = [HasHWDoubleAbs] in {
+defm DABS : UnaryOpMCf64<IL_OP_D_ABS, fabs>;
+  let mayLoad = 0, mayStore=0 in {
+defm ABS  : UnaryIntrinsicDouble<IL_OP_D_ABS, int_AMDIL_fabs>;
+  }
+} 
+let Predicates = [HasSWDoubleAbs] in {
+def SWDABS_f64 : OneInOneOut<IL_OP_D_ABS, (outs GPRF64:$dst),
+      (ins GPRF64:$src),
+      !strconcat(IL_OP_D_FREXP.Text," $dst, ${src}_abs(yw)"),
+      [(set GPRF64:$dst, (fabs GPRF64:$src))]>;
+  let mayLoad = 0, mayStore=0 in {
+def SWABS_f64 : OneInOneOut<IL_OP_D_ABS, (outs GPRF64:$dst),
+      (ins GPRF64:$src),
+      !strconcat(IL_OP_D_FREXP.Text," $dst, ${src}_abs(yw)"),
+      [(set GPRF64:$dst, (int_AMDIL_fabs GPRF64:$src))]>;
+  }
+}
+def FREXP_f64 : OneInOneOut<IL_OP_D_FREXP, (outs GPRV2I64:$dst),
+      (ins GPRF64:$src),
+      !strconcat(IL_OP_D_FREXP.Text," $dst, $src"),
+      [(set GPRV2I64:$dst,
+      (int_AMDIL_frexp_f64 GPRF64:$src))]>;
+def LDEXP_f64 : TwoInOneOut<IL_OP_D_LDEXP, (outs GPRF64:$dst),
+      (ins GPRF64:$src, GPRI32:$src1),
+      !strconcat(IL_OP_D_LDEXP.Text, " $dst, $src, $src1"),
+      [(set GPRF64:$dst,
+      (int_AMDIL_ldexp GPRF64:$src, GPRI32:$src1))]>;
+def LDEXP_v2f64 : TwoInOneOut<IL_OP_D_LDEXP, (outs GPRV2F64:$dst),
+      (ins GPRV2F64:$src, GPRV2I32:$src1),
+      !strconcat(IL_OP_D_LDEXP.Text, " $dst, $src, $src1"),
+      [(set GPRV2F64:$dst,
+      (int_AMDIL_ldexp GPRV2F64:$src, GPRV2I32:$src1))]>;
+//===---------------------------------------------------------------------===//
+// double math instructions end here
+//===---------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
+// Various Macros
+//===---------------------------------------------------------------------===//
+def MACRO__sdiv_i8   : BinaryMacro< GPRI8, GPRI8, GPRI8, sdiv>;
+def MACRO__sdiv_i16  : BinaryMacro<GPRI16, GPRI16, GPRI16, sdiv>;
+def MACRO__sdiv_i32  : BinaryMacro<GPRI32, GPRI32, GPRI32, sdiv>;
+def MACRO__udiv_i8   : BinaryMacro< GPRI8, GPRI8, GPRI8, udiv>;
+def MACRO__udiv_i16  : BinaryMacro<GPRI16, GPRI16, GPRI16, udiv>;
+def MACRO__udiv_i32  : BinaryMacro<GPRI32, GPRI32, GPRI32, udiv>;
+def MACRO__smod_i8   : BinaryMacro< GPRI8, GPRI8, GPRI8, srem>;
+def MACRO__smod_i16  : BinaryMacro<GPRI16, GPRI16, GPRI16, srem>;
+def MACRO__smod_i32  : BinaryMacro<GPRI32, GPRI32, GPRI32, srem>;
+def MACRO__umod_i8   : BinaryMacro< GPRI8, GPRI8, GPRI8, urem>;
+def MACRO__umod_i16  : BinaryMacro<GPRI16, GPRI16, GPRI16, urem>;
+def MACRO__umod_i32  : BinaryMacro<GPRI32, GPRI32, GPRI32, urem>;
+let Predicates = [HasSWDDiv] in {
+  def MACRO__ddiv_f64: BinaryMacro<GPRF64, GPRF64, GPRF64, fdiv>;
+}
+let Predicates = [HasHWDDiv] in {
+  def MACRO__ddiv_f64_fma: BinaryMacro<GPRF64, GPRF64, GPRF64, fdiv>;
+}
+def MACRO__ftol_i64  : UnaryMacro<GPRI64, GPRF32, fp_to_sint>;
+def MACRO__ultof_f32 : UnaryMacro<GPRF32, GPRI64, uint_to_fp>;
+def MACRO__ltof_f32  : UnaryMacro<GPRF32, GPRI64, sint_to_fp>;
+let Predicates = [HasSW64Mul] in {
+def MACRO__mul_i64   : BinaryMacro<GPRI64, GPRI64, GPRI64, mul>;
+def MACRO__mul_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, mul>;
+}
+let Predicates = [HasSW64DivMod] in {
+def MACRO__sdiv_i64  : BinaryMacro<GPRI64, GPRI64, GPRI64, sdiv>;
+def MACRO__udiv_i64  : BinaryMacro<GPRI64, GPRI64, GPRI64, udiv>;
+def MACRO__smod_i64  : BinaryMacro<GPRI64, GPRI64, GPRI64, srem>;
+def MACRO__umod_i64  : BinaryMacro<GPRI64, GPRI64, GPRI64, urem>;
+}
+
+let Predicates = [HasHW64DivMod] in {
+  defm SDIV : BinaryOpMCi64<IL_OP_I64_DIV, sdiv>;
+  defm UDIV : BinaryOpMCi64<IL_OP_U64_DIV, udiv>;
+  defm SMOD : BinaryOpMCi64<IL_OP_I64_MOD, srem>;
+  defm UMOD : BinaryOpMCi64<IL_OP_U64_MOD, urem>;
+}
+let Predicates = [HasHW64Mul] in {
+  defm SMUL       : BinaryOpMCi64<IL_OP_I64_MUL, mul>;
+  defm UMUL       : BinaryOpMCi64<IL_OP_U64_MUL, IL_umul>;
+}
+// Apple requires a pattern since they pass down the shift operand as
+// a 64bit value, although the lower 6 bits are all that are used.
+// vector 2 use the software emulated mode since SC only supports
+// scalar 64bit ops.
+def MACRO__shr_apple_v2i64: BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, srl>;
+def MACRO__shl_apple_v2i64: BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, shl>;
+def MACRO__sra_apple_v2i64: BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, sra>;
+def MACRO__shr_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, srl>;
+def MACRO__shl_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, shl>;
+def MACRO__sra_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, sra>;
+
+let Predicates = [HasSW64Bit] in {
+def MACRO__shr_apple_i64: BinaryMacro<GPRI64, GPRI64, GPRI64, srl>;
+def MACRO__shl_apple_i64: BinaryMacro<GPRI64, GPRI64, GPRI64, shl>;
+def MACRO__sra_apple_i64: BinaryMacro<GPRI64, GPRI64, GPRI64, sra>;
+def MACRO__shr_i64   : BinaryMacro<GPRI64, GPRI64, GPRI32, srl>;
+def MACRO__shl_i64   : BinaryMacro<GPRI64, GPRI64, GPRI32, shl>;
+def MACRO__sra_i64   : BinaryMacro<GPRI64, GPRI64, GPRI32, sra>;
+}
+
+//===---------------------------------------------------------------------===//
+// Comparison Instructions
+//===---------------------------------------------------------------------===//
+let usesCustomInserter = 1 in {
+    defm CMP : Compare<"Pseudo comparison instr">;
+}
+//===---------------------------------------------------------------------===//
+// 32-bit floating point operations
+//===---------------------------------------------------------------------===//
+def FEQ         : TwoInOneOut<IL_OP_EQ, (outs GPRF32:$dst),
+        (ins GPRF32:$lhs, GPRF32:$rhs),
+        !strconcat(IL_OP_EQ.Text, " $dst, $lhs, $rhs")
+        , []>;
+def FGE         : TwoInOneOut<IL_OP_GE, (outs GPRF32:$dst),
+        (ins GPRF32:$lhs, GPRF32:$rhs),
+        !strconcat(IL_OP_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def FLT         : TwoInOneOut<IL_OP_LT, (outs GPRF32:$dst),
+        (ins GPRF32:$lhs, GPRF32:$rhs),
+        !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def FLT_v2f32 : TwoInOneOut<IL_OP_LT, (outs GPRV2F32:$dst),
+        (ins GPRV2F32:$lhs, GPRV2F32:$rhs),
+        !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def FLT_v4f32 : TwoInOneOut<IL_OP_LT, (outs GPRV4F32:$dst),
+        (ins GPRV4F32:$lhs, GPRV4F32:$rhs),
+        !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def FNE         : TwoInOneOut<IL_OP_NE, (outs GPRF32:$dst),
+        (ins GPRF32:$lhs, GPRF32:$rhs),
+        !strconcat(IL_OP_NE.Text, " $dst, $lhs, $rhs")
+        , []>;
+
+//===---------------------------------------------------------------------===//
+//TODO: need to correctly define comparison instructions
+//===---------------------------------------------------------------------===//
+def DEQ        : TwoInOneOut<IL_OP_D_EQ, (outs GPRF64:$dst),
+        (ins GPRF64:$lhs, GPRF64:$rhs),
+        !strconcat(IL_OP_D_EQ.Text, " $dst, $lhs, $rhs")
+        , []>;
+def DEQ_v2f64        : TwoInOneOut<IL_OP_D_EQ, (outs GPRV2F64:$dst),
+        (ins GPRV2F64:$lhs, GPRV2F64:$rhs),
+        !strconcat(IL_OP_D_EQ.Text, " $dst, $lhs, $rhs")
+        , []>;
+def DGE        : TwoInOneOut<IL_OP_D_GE, (outs GPRF64:$dst),
+        (ins GPRF64:$lhs, GPRF64:$rhs),
+        !strconcat(IL_OP_D_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def DLT        : TwoInOneOut<IL_OP_D_LT, (outs GPRF64:$dst),
+        (ins GPRF64:$lhs, GPRF64:$rhs),
+        !strconcat(IL_OP_D_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def DNE        : TwoInOneOut<IL_OP_D_NE, (outs GPRF64:$dst),
+        (ins GPRF64:$lhs, GPRF64:$rhs),
+        !strconcat(IL_OP_D_NE.Text, " $dst, $lhs, $rhs")
+        , []>;
+
+//===---------------------------------------------------------------------===//
+//TODO: need to correctly define comparison instructions
+//===---------------------------------------------------------------------===//
+def IEQ        : TwoInOneOut<IL_OP_I_EQ, (outs GPRI32:$dst),
+        (ins GPRI32:$lhs, GPRI32:$rhs),
+        !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+        , []>;
+def IEQ_v2i32        : TwoInOneOut<IL_OP_I_EQ, (outs GPRV2I32:$dst),
+        (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+        !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+        , []>;
+def IEQ_v4i32        : TwoInOneOut<IL_OP_I_EQ, (outs GPRV4I32:$dst),
+        (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+        !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+        , []>;
+def IGE        : TwoInOneOut<IL_OP_I_GE, (outs GPRI32:$dst),
+        (ins GPRI32:$lhs, GPRI32:$rhs),
+        !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def IGE_v2i32        : TwoInOneOut<IL_OP_I_GE, (outs GPRV2I32:$dst),
+        (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+        !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def IGE_v4i32        : TwoInOneOut<IL_OP_I_GE, (outs GPRV4I32:$dst),
+        (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+        !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def ILT        : TwoInOneOut<IL_OP_I_LT, (outs GPRI32:$dst),
+        (ins GPRI32:$lhs, GPRI32:$rhs),
+        !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def ILT_v2i32        : TwoInOneOut<IL_OP_I_LT, (outs GPRV2I32:$dst),
+        (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+        !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def ILT_v4i32        : TwoInOneOut<IL_OP_I_LT, (outs GPRV4I32:$dst),
+        (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+        !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def INE        : TwoInOneOut<IL_OP_I_NE, (outs GPRI32:$dst),
+        (ins GPRI32:$lhs, GPRI32:$rhs),
+        !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def INE_v2i32        : TwoInOneOut<IL_OP_I_NE, (outs GPRV2I32:$dst),
+        (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+        !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def INE_v4i32        : TwoInOneOut<IL_OP_I_NE, (outs GPRV4I32:$dst),
+        (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+        !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+        , []>;
+let Predicates = [HasHW64Bit] in {
+def LEQ        : TwoInOneOut<IL_OP_I64_EQ, (outs GPRI64:$dst),
+        (ins GPRI64:$lhs, GPRI64:$rhs),
+        !strconcat(IL_OP_I64_EQ.Text, " $dst, $lhs, $rhs")
+        , []>;
+def LGE        : TwoInOneOut<IL_OP_I64_GE, (outs GPRI64:$dst),
+        (ins GPRI64:$lhs, GPRI64:$rhs),
+        !strconcat(IL_OP_I64_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def LLE        : TwoInOneOut<IL_OP_I64_GE, (outs GPRI64:$dst),
+        (ins GPRI64:$lhs, GPRI64:$rhs),
+        !strconcat(IL_OP_I64_GE.Text, " $dst, $rhs, $lhs")
+        , []>;
+def LGT        : TwoInOneOut<IL_OP_I64_LT, (outs GPRI64:$dst),
+        (ins GPRI64:$lhs, GPRI64:$rhs),
+        !strconcat(IL_OP_I64_LT.Text, " $dst, $rhs, $lhs")
+        , []>;
+def LLT        : TwoInOneOut<IL_OP_I64_LT, (outs GPRI64:$dst),
+        (ins GPRI64:$lhs, GPRI64:$rhs),
+        !strconcat(IL_OP_I64_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def LNE        : TwoInOneOut<IL_OP_I64_NE, (outs GPRI64:$dst),
+        (ins GPRI64:$lhs, GPRI64:$rhs),
+        !strconcat(IL_OP_I64_NE.Text, " $dst, $lhs, $rhs")
+        , []>;
+}
+
+//===---------------------------------------------------------------------===//
+// Unsigned Integer Operations
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+//TODO: need to correctly define comparison instructions
+//===---------------------------------------------------------------------===//
+def UEQ        : TwoInOneOut<IL_OP_I_EQ, (outs GPRI32:$dst),
+        (ins GPRI32:$lhs, GPRI32:$rhs),
+        !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UEQ_v2i32        : TwoInOneOut<IL_OP_I_EQ, (outs GPRV2I32:$dst),
+        (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+        !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UEQ_v4i32        : TwoInOneOut<IL_OP_I_EQ, (outs GPRV4I32:$dst),
+        (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+        !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+        , []>;
+def ULE        : TwoInOneOut<IL_OP_U_GE, (outs GPRI32:$dst),
+        (ins GPRI32:$lhs, GPRI32:$rhs),
+        !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def ULE_v2i32        : TwoInOneOut<IL_OP_U_GE, (outs GPRV2I32:$dst),
+        (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+        !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def ULE_v4i32        : TwoInOneOut<IL_OP_U_GE, (outs GPRV4I32:$dst),
+        (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+        !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UGT        : TwoInOneOut<IL_OP_U_LT, (outs GPRI32:$dst),
+        (ins GPRI32:$lhs, GPRI32:$rhs),
+        !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UGT_v2i32        : TwoInOneOut<IL_OP_U_LT, (outs GPRV2I32:$dst),
+        (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+        !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UGT_v4i32        : TwoInOneOut<IL_OP_U_LT, (outs GPRV4I32:$dst),
+        (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+        !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UGE        : TwoInOneOut<IL_OP_U_GE, (outs GPRI32:$dst),
+        (ins GPRI32:$lhs, GPRI32:$rhs),
+        !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UGE_v2i32        : TwoInOneOut<IL_OP_U_GE, (outs GPRV2I32:$dst),
+        (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+        !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UGE_v4i32        : TwoInOneOut<IL_OP_U_GE, (outs GPRV4I32:$dst),
+        (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+        !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def ULT        : TwoInOneOut<IL_OP_U_LT, (outs GPRI32:$dst),
+        (ins GPRI32:$lhs, GPRI32:$rhs),
+        !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def ULT_v2i32        : TwoInOneOut<IL_OP_U_LT, (outs GPRV2I32:$dst),
+        (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+        !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def ULT_v4i32        : TwoInOneOut<IL_OP_U_LT, (outs GPRV4I32:$dst),
+        (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+        !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UNE        : TwoInOneOut<IL_OP_I_NE, (outs GPRI32:$dst),
+        (ins GPRI32:$lhs, GPRI32:$rhs),
+        !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UNE_v2i32        : TwoInOneOut<IL_OP_I_NE, (outs GPRV2I32:$dst),
+        (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+        !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def UNE_v4i32        : TwoInOneOut<IL_OP_I_NE, (outs GPRV4I32:$dst),
+        (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+        !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+        , []>;
+let Predicates = [HasHW64Bit] in {
+def ULLE        : TwoInOneOut<IL_OP_U64_GE, (outs GPRI64:$dst),
+        (ins GPRI64:$lhs, GPRI64:$rhs),
+        !strconcat(IL_OP_U64_GE.Text, " $dst, $rhs, $lhs")
+        , []>;
+def ULGT        : TwoInOneOut<IL_OP_U64_LT, (outs GPRI64:$dst),
+        (ins GPRI64:$lhs, GPRI64:$rhs),
+        !strconcat(IL_OP_U64_LT.Text, " $dst, $rhs, $lhs")
+        , []>;
+def ULGE        : TwoInOneOut<IL_OP_U64_GE, (outs GPRI64:$dst),
+        (ins GPRI64:$lhs, GPRI64:$rhs),
+        !strconcat(IL_OP_U64_GE.Text, " $dst, $lhs, $rhs")
+        , []>;
+def ULLT        : TwoInOneOut<IL_OP_U64_LT, (outs GPRI64:$dst),
+        (ins GPRI64:$lhs, GPRI64:$rhs),
+        !strconcat(IL_OP_U64_LT.Text, " $dst, $lhs, $rhs")
+        , []>;
+}
+//===---------------------------------------------------------------------===//
+// Scalar ==> Scalar conversion functions
+//===---------------------------------------------------------------------===//
+// f32 ==> f64
+def FTOD        : UnaryOp<IL_OP_F_2_D,         fextend,     GPRF64, GPRF32>;
+// f64 ==> f32
+def DTOF        : UnaryOp<IL_OP_D_2_F,         IL_d2f,     GPRF32, GPRF64>;
+// f32 ==> i32 signed
+def FTOI        : UnaryOp<IL_OP_FTOI,          fp_to_sint, GPRI32, GPRF32>;
+def FTOI_v2i32  : UnaryOp<IL_OP_FTOI,          fp_to_sint, GPRV2I32, GPRV2F32>;
+def FTOI_v4i32  : UnaryOp<IL_OP_FTOI,          fp_to_sint, GPRV4I32, GPRV4F32>;
+// i32 ==> f32 signed
+def ITOF        : UnaryOp<IL_OP_ITOF,          sint_to_fp, GPRF32, GPRI32>;
+def ITOF_v2f32  : UnaryOp<IL_OP_ITOF,          sint_to_fp, GPRV2F32, GPRV2I32>;
+def ITOF_v4f32  : UnaryOp<IL_OP_ITOF,          sint_to_fp, GPRV4F32, GPRV4I32>;
+// f32 ==> i32 unsigned
+def FTOU        : UnaryOp<IL_OP_FTOU,          fp_to_uint, GPRI32, GPRF32>;
+def FTOU_v2i32  : UnaryOp<IL_OP_FTOU,          fp_to_uint, GPRV2I32, GPRV2F32>;
+def FTOU_v4i32  : UnaryOp<IL_OP_FTOU,          fp_to_uint, GPRV4I32, GPRV4F32>;
+// i32 ==> f32 unsigned
+def UTOF        : UnaryOp<IL_OP_UTOF,          uint_to_fp, GPRF32, GPRI32>;
+def UTOF_v2f32  : UnaryOp<IL_OP_UTOF,          uint_to_fp, GPRV2F32, GPRV2I32>;
+def UTOF_v4f32  : UnaryOp<IL_OP_UTOF,          uint_to_fp, GPRV4F32, GPRV4I32>;
+let Predicates = [HasHWDoubleConv] in {
+  // f64 ==> i32 signed
+  def DTOI        : UnaryOp<IL_OP_DTOI,          fp_to_sint, GPRI32, GPRF64>;
+  // i32 ==> f64 signed
+  def ITOD        : UnaryOp<IL_OP_ITOD,          sint_to_fp, GPRF64, GPRI32>;
+  // f64 ==> i32 unsigned
+  def DTOU        : UnaryOp<IL_OP_DTOU,          fp_to_uint, GPRI32, GPRF64>;
+  // i32 ==> f64 unsigned
+  def UTOD        : UnaryOp<IL_OP_UTOD,          uint_to_fp, GPRF64, GPRI32>;
+}
+// Get upper 32 bits of f64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DHI         : OneInOneOut<IL_OP_MOV,  (outs GPRI32:$dst),
+                (ins GPRF64:$src),
+                !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+                [(set GPRI32:$dst, (IL_dcomphi GPRF64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DHI_v2f64   : OneInOneOut<IL_OP_MOV,  (outs GPRV2I32:$dst),
+                (ins GPRV2F64:$src),
+                !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+                [(set GPRV2I32:$dst, (IL_dcomphi2 GPRV2F64:$src))]>;
+// Get lower 32 bits of f64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DLO         : OneInOneOut<IL_OP_MOV,  (outs GPRI32:$dst),
+                (ins GPRF64:$src),
+                !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+                [(set GPRI32:$dst, (IL_dcomplo GPRF64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DLO_v2f64   : OneInOneOut<IL_OP_MOV,  (outs GPRV2I32:$dst),
+                (ins GPRV2F64:$src),
+                !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+                [(set GPRV2I32:$dst, (IL_dcomplo2 GPRV2F64:$src))]>;
+// Convert two 32 bit integers into a f64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DCREATE     : TwoInOneOut<IL_OP_I_ADD, (outs GPRF64:$dst),
+                (ins GPRI32:$src0, GPRI32:$src1),
+                !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+                [(set GPRF64:$dst, (IL_dcreate GPRI32:$src0, GPRI32:$src1))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DCREATE_v2f64 : TwoInOneOut<IL_OP_I_ADD, (outs GPRV2F64:$dst),
+                (ins GPRV2I32:$src0, GPRV2I32:$src1),
+                !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+                [(set GPRV2F64:$dst,
+                    (IL_dcreate2 GPRV2I32:$src0, GPRV2I32:$src1))]>;
+// Get upper 32 bits of i64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LHI         : OneInOneOut<IL_OP_MOV,  (outs GPRI32:$dst),
+                (ins GPRI64:$src),
+                !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+                [(set GPRI32:$dst, (IL_lcomphi GPRI64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LHI_v2i64         : OneInOneOut<IL_OP_MOV,  (outs GPRV2I32:$dst),
+                (ins GPRV2I64:$src),
+                !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+                [(set GPRV2I32:$dst, (IL_lcomphi2 GPRV2I64:$src))]>;
+// Get lower 32 bits of i64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LLO         : OneInOneOut<IL_OP_MOV,  (outs GPRI32:$dst),
+                (ins GPRI64:$src),
+                !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+                [(set GPRI32:$dst, (IL_lcomplo GPRI64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LLO_v2i64         : OneInOneOut<IL_OP_MOV,  (outs GPRV2I32:$dst),
+                (ins GPRV2I64:$src),
+                !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+                [(set GPRV2I32:$dst, (IL_lcomplo2 GPRV2I64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def HILO_BITOR_v4i16 : TwoInOneOut<IL_OP_I_OR, (outs GPRI32:$dst),
+                (ins GPRI32:$src, GPRI32:$src2),
+                !strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def HILO_BITOR_v2i32 : TwoInOneOut<IL_OP_I_OR, (outs GPRI32:$dst),
+                (ins GPRI32:$src, GPRI32:$src2),
+                !strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def HILO_BITOR_v2i64 : TwoInOneOut<IL_OP_I_OR, (outs GPRI64:$dst),
+                (ins GPRI64:$src, GPRI64:$src2),
+                !strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
+// Convert two 32 bit integers into a i64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LCREATE     : TwoInOneOut<IL_OP_I_ADD, (outs GPRI64:$dst),
+                (ins GPRI32:$src0, GPRI32:$src1),
+                !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+                [(set GPRI64:$dst, (IL_lcreate GPRI32:$src0, GPRI32:$src1))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LCREATE_v2i64     : TwoInOneOut<IL_OP_I_ADD, (outs GPRV2I64:$dst),
+                (ins GPRV2I32:$src0, GPRV2I32:$src1),
+                !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+                [(set GPRV2I64:$dst,
+                    (IL_lcreate2 GPRV2I32:$src0, GPRV2I32:$src1))]>;
+//===---------------------------------------------------------------------===//
+// Scalar ==> Vector conversion functions
+//===---------------------------------------------------------------------===//
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VCREATE          : UnaryOpMCVec<IL_OP_MOV, IL_vbuild>;
+
+//===---------------------------------------------------------------------===//
+// Vector ==> Scalar conversion functions
+//===---------------------------------------------------------------------===//
+
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VEXTRACT         : VectorExtract<IL_vextract>;
+
+//===---------------------------------------------------------------------===//
+// Vector ==> Vector conversion functions
+//===---------------------------------------------------------------------===//
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VINSERT          : VectorInsert<IL_vinsert>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VCONCAT      : VectorConcat<IL_vconcat>;
+
+//===---------------------------------------------------------------------===//
+// Bit conversion functions
+//===---------------------------------------------------------------------===//
+defm IL_ASCHAR  : BitConversion<IL_OP_MOV, GPRI8, IL_bitconv>;
+defm IL_ASSHORT : BitConversion<IL_OP_MOV, GPRI16, IL_bitconv>;
+defm IL_ASINT   : BitConversion<IL_OP_MOV, GPRI32, IL_bitconv>;
+defm IL_ASFLOAT : BitConversion<IL_OP_MOV, GPRF32, IL_bitconv>;
+defm IL_ASDOUBLE : BitConversion<IL_OP_MOV, GPRF64, IL_bitconv>;
+defm IL_ASLONG  : BitConversion<IL_OP_MOV, GPRI64, IL_bitconv>;
+defm IL_ASV2CHAR  : BitConversion<IL_OP_MOV, GPRV2I8, IL_bitconv>;
+defm IL_ASV2SHORT : BitConversion<IL_OP_MOV, GPRV2I16, IL_bitconv>;
+defm IL_ASV2INT   : BitConversion<IL_OP_MOV, GPRV2I32, IL_bitconv>;
+defm IL_ASV2FLOAT : BitConversion<IL_OP_MOV, GPRV2F32, IL_bitconv>;
+defm IL_ASV2DOUBLE : BitConversion<IL_OP_MOV, GPRV2F64, IL_bitconv>;
+defm IL_ASV2LONG  : BitConversion<IL_OP_MOV, GPRV2I64, IL_bitconv>;
+defm IL_ASV4CHAR  : BitConversion<IL_OP_MOV, GPRV4I8, IL_bitconv>;
+defm IL_ASV4SHORT : BitConversion<IL_OP_MOV, GPRV4I16, IL_bitconv>;
+defm IL_ASV4INT   : BitConversion<IL_OP_MOV, GPRV4I32, IL_bitconv>;
+defm IL_ASV4FLOAT : BitConversion<IL_OP_MOV, GPRV4F32, IL_bitconv>;
+
+//===---------------------------------------------------------------------===//
+// Custom Inserter for Branches and returns, this eventually will be a
+// seperate pass
+//===---------------------------------------------------------------------===//
+let isTerminator = 1 in {
+  def BRANCH : ILFormat<IL_PSEUDO_INST, (outs), (ins brtarget:$target),
+      "; Pseudo unconditional branch instruction",
+      [(br bb:$target)]>;
+  defm BRANCH_COND : BranchConditional<IL_brcond>;
+}
+//===---------------------------------------------------------------------===//
+// return instructions
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
+  def RETURN          : ILFormat<IL_OP_RET,(outs), (ins variable_ops),
+      IL_OP_RET.Text, []>;
+  def RETDYN      : ILFormat<IL_OP_RET_DYN, (outs), (ins variable_ops),
+      IL_OP_RET_DYN.Text, [(IL_retflag)]>;
+}
+//===---------------------------------------------------------------------===//
+// Lower and raise the stack x amount
+//===---------------------------------------------------------------------===//
+def ADJCALLSTACKDOWN : ILFormat<IL_PSEUDO_INST, (outs), (ins i32imm:$amt),
+    "; begin of call sequence $amt",
+    [(IL_callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : ILFormat<IL_PSEUDO_INST, (outs), (ins i32imm:$amt1,
+    i32imm:$amt2),
+    "; end of call sequence $amt1 $amt2",
+    [(IL_callseq_end timm:$amt1, timm:$amt2)]>;
+
+//===---------------------------------------------------------------------===//
+// Handle a function call
+//===---------------------------------------------------------------------===//
+let isCall = 1 in {
+  let Uses = [
+     R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9, R10, R11, R12, R13, R14, R15, R16,
+    R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32
+    ] in {
+      def CALL : UnaryOpNoRet<IL_OP_CALL, (outs),
+      (ins calltarget:$dst, variable_ops),
+      !strconcat(IL_OP_CALL.Text, " $dst"), []>;
+    }
+}
+
+
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+let isTerminator=1 in {
+  def SWITCH      : ILFormat<IL_OP_SWITCH, (outs), (ins GPRI32:$src),
+  !strconcat(IL_OP_SWITCH.Text, " $src"), []>;
+  def CASE        : ILFormat<IL_OP_CASE, (outs), (ins GPRI32:$src),
+      !strconcat(IL_OP_CASE.Text, " $src"), []>;
+  def BREAK       : ILFormat<IL_OP_BREAK, (outs), (ins),
+      IL_OP_BREAK.Text, []>;
+  def CONTINUE    : ILFormat<IL_OP_CONTINUE, (outs), (ins),
+      IL_OP_CONTINUE.Text, []>;
+  def DEFAULT     : ILFormat<IL_OP_DEFAULT, (outs), (ins),
+      IL_OP_DEFAULT.Text, []>;
+  def ELSE        : ILFormat<IL_OP_ELSE, (outs), (ins),
+      IL_OP_ELSE.Text, []>;
+  def ENDSWITCH   : ILFormat<IL_OP_ENDSWITCH, (outs), (ins),
+      IL_OP_ENDSWITCH.Text, []>;
+  def ENDMAIN     : ILFormat<IL_OP_ENDMAIN, (outs), (ins),
+      IL_OP_ENDMAIN.Text, []>;
+  def END         : ILFormat<IL_OP_END, (outs), (ins),
+      IL_OP_END.Text, []>;
+  def ENDFUNC     : ILFormat<IL_OP_ENDFUNC, (outs), (ins),
+      IL_OP_ENDFUNC.Text, []>;
+  def ENDIF       : ILFormat<IL_OP_ENDIF, (outs), (ins),
+      IL_OP_ENDIF.Text, []>;
+  def WHILELOOP   : ILFormat<IL_OP_WHILE, (outs), (ins),
+      IL_OP_WHILE.Text, []>;
+  def ENDLOOP     : ILFormat<IL_OP_ENDLOOP, (outs), (ins),
+      IL_OP_ENDLOOP.Text, []>;
+  def FUNC        : ILFormat<IL_OP_FUNC, (outs), (ins),
+      IL_OP_FUNC.Text, []>;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm IF_LOGICALNZ  : BranchInstr<IL_OP_IF_LOGICALNZ>;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm IF_LOGICALZ   : BranchInstr<IL_OP_IF_LOGICALZ>;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm BREAK_LOGICALNZ : BranchInstr<IL_OP_BREAK_LOGICALNZ>;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm BREAK_LOGICALZ : BranchInstr<IL_OP_BREAK_LOGICALZ>;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm CONTINUE_LOGICALNZ : BranchInstr<IL_OP_CONTINUE_LOGICALNZ>;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm CONTINUE_LOGICALZ : BranchInstr<IL_OP_CONTINUE_LOGICALZ>;
+  defm IFC         : BranchInstr2<IL_OP_IFC>;
+  defm BREAKC      : BranchInstr2<IL_OP_BREAKC>;
+  defm CONTINUEC   : BranchInstr2<IL_OP_CONTINUEC>;
+}
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+  def TRAP : ILFormat<IL_OP_NOP, (outs), (ins),
+      IL_OP_NOP.Text, [(trap)]>;
+}
+
+//===---------------------------------------------------------------------===//
+//----------------- Work Item Functions - OpenCL 6.11.1 ---------------------//
+//===---------------------------------------------------------------------===//
+let isCall=1, isAsCheapAsAMove = 1 in {
+  def GET_WORK_DIM : ILFormat<IL_OP_MOV, (outs GPRXI32:$dst), (ins),
+      !strconcat(IL_OP_MOV.Text, " $dst, cb0[0].w"),
+      [(set GPRXI32:$dst, (int_AMDIL_get_work_dim))]>;
+
+  def GET_GLOBAL_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1021.xyz0"),
+      [(set GPRV4I32:$dst, (int_AMDIL_get_global_id))]>;
+
+  def GET_LOCAL_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1022.xyz0"),
+      [(set GPRV4I32:$dst, (int_AMDIL_get_local_id))]>;
+
+  def GET_GROUP_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1023.xyz0"),
+      [(set GPRV4I32:$dst, (int_AMDIL_get_group_id))]>;
+
+  def GET_GLOBAL_SIZE : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[0].xyz0"),
+      [(set GPRV4I32:$dst, (int_AMDIL_get_global_size))]>;
+
+  def GET_LOCAL_SIZE : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[1].xyz0"),
+      [(set GPRV4I32:$dst, (int_AMDIL_get_local_size))]>;
+
+  def GET_NUM_GROUPS : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[2].xyz0"),
+      [(set GPRV4I32:$dst, (int_AMDIL_get_num_groups))]>;
+
+  let Predicates = [Has32BitPtr] in {
+    def GET_GLOBAL_OFFSET : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+        (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[9].xyz0"),
+        [(set GPRV4I32:$dst, (int_AMDIL_get_global_offset))]>;
+  }
+
+  let Predicates = [Has64BitPtr] in {
+    def GET_GLOBAL_OFFSET64 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+        (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[9].yzw0"),
+        [(set GPRV4I32:$dst, (int_AMDIL_get_global_offset))]>;
+  }
+}
+//===---------------------------------------------------------------------===//
+//------------- Synchronization Functions - OpenCL 6.11.9 -------------------//
+//===---------------------------------------------------------------------===//
+let isCall=1 in {
+  def FENCE : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+      "fence_lds_memory_gds",
+      [(int_AMDIL_fence GPRI32:$flag)]>;
+
+  def FENCE_LOCAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+      "fence_lds",
+      [(int_AMDIL_fence_local GPRI32:$flag)]>;
+
+  def FENCE_GLOBAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+      "fence_memory",
+      [(int_AMDIL_fence_global GPRI32:$flag)]>;
+
+  def FENCE_GLOBAL_LOCAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+      "fence_memory_lds",
+      [(int_AMDIL_fence_global_local GPRI32:$flag)]>;
+
+  def FENCE_REGION : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+      "fence_gds",
+      [(int_AMDIL_fence_region GPRI32:$flag)]>;
+
+  def FENCE_REGION_LOCAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+      "fence_gds_lds",
+      [(int_AMDIL_fence_region_local GPRI32:$flag)]>;
+
+  def FENCE_REGION_GLOBAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+      "fence_gds_memory",
+      [(int_AMDIL_fence_region_global GPRI32:$flag)]>;
+
+  def FENCE_READ_ONLY : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_lds_memory_gds_mem_read_only",
+      [(int_AMDIL_read_fence GPRI32:$flag)]>;
+
+  def FENCE_READ_ONLY_LOCAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_lds_mem_read_only",
+      [(int_AMDIL_read_fence_local GPRI32:$flag)]>;
+
+  def FENCE_READ_ONLY_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_memory_mem_read_only",
+      [(int_AMDIL_read_fence_global GPRI32:$flag)]>;
+
+  def FENCE_READ_ONLY_GLOBAL_LOCAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_memory_lds_mem_read_only",
+      [(int_AMDIL_read_fence_global_local GPRI32:$flag)]>;
+
+  def FENCE_READ_ONLY_REGION : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_gds_mem_read_only",
+      [(int_AMDIL_read_fence_region GPRI32:$flag)]>;
+
+  def FENCE_READ_ONLY_REGION_LOCAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_gds_lds_mem_read_only",
+      [(int_AMDIL_read_fence_region_local GPRI32:$flag)]>;
+
+  def FENCE_READ_ONLY_REGION_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_gds_memory_mem_read_only",
+      [(int_AMDIL_read_fence_region_global GPRI32:$flag)]>;
+
+  def FENCE_WRITE_ONLY : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_lds_memory_gds_mem_write_only",
+      [(int_AMDIL_write_fence GPRI32:$flag)]>;
+
+  def FENCE_WRITE_ONLY_LOCAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_lds_mem_write_only",
+      [(int_AMDIL_write_fence_local GPRI32:$flag)]>;
+
+  def FENCE_WRITE_ONLY_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_memory_mem_write_only",
+      [(int_AMDIL_write_fence_global GPRI32:$flag)]>;
+
+  def FENCE_WRITE_ONLY_GLOBAL_LOCAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_memory_lds_mem_write_only",
+      [(int_AMDIL_write_fence_global_local GPRI32:$flag)]>;
+
+  def FENCE_WRITE_ONLY_REGION : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_gds_mem_write_only",
+      [(int_AMDIL_write_fence_region GPRI32:$flag)]>;
+
+  def FENCE_WRITE_ONLY_REGION_LOCAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_gds_lds_mem_write_only",
+      [(int_AMDIL_write_fence_region_local GPRI32:$flag)]>;
+
+  def FENCE_WRITE_ONLY_REGION_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+      "fence_gds_memory_mem_write_only",
+      [(int_AMDIL_write_fence_region_global GPRI32:$flag)]>;
+
+}
+
+let isReturn = 1 in {
+  def EARLY_EXIT : UnaryOpNoRet<IL_OP_RET_LOGICALNZ, (outs),
+      (ins GPRI32:$flag),
+      !strconcat(IL_OP_RET_LOGICALNZ.Text, " $flag"),
+      [(int_AMDIL_early_exit GPRI32:$flag)]>;
+}
+def MEDIA_UNPACK_0 : OneInOneOut<IL_OP_UNPACK_0, (outs GPRV4F32:$dst),
+    (ins GPRV4I32:$src),
+    !strconcat(IL_OP_UNPACK_0.Text, " $dst, $src"),
+    [(set GPRV4F32:$dst,
+        (v4f32 (int_AMDIL_media_unpack_byte_0 GPRV4I32:$src)))]>;
+def MEDIA_UNPACK_1 : OneInOneOut<IL_OP_UNPACK_1, (outs GPRV4F32:$dst),
+    (ins GPRV4I32:$src),
+    !strconcat(IL_OP_UNPACK_1.Text, " $dst, $src"),
+    [(set GPRV4F32:$dst,
+        (v4f32 (int_AMDIL_media_unpack_byte_1 GPRV4I32:$src)))]>;
+def MEDIA_UNPACK_2 : OneInOneOut<IL_OP_UNPACK_2, (outs GPRV4F32:$dst),
+    (ins GPRV4I32:$src),
+    !strconcat(IL_OP_UNPACK_2.Text, " $dst, $src"),
+    [(set GPRV4F32:$dst,
+        (v4f32 (int_AMDIL_media_unpack_byte_2 GPRV4I32:$src)))]>;
+def MEDIA_UNPACK_3 : OneInOneOut<IL_OP_UNPACK_3, (outs GPRV4F32:$dst),
+    (ins GPRV4I32:$src),
+    !strconcat(IL_OP_UNPACK_3.Text, " $dst, $src"),
+    [(set GPRV4F32:$dst,
+        (v4f32 (int_AMDIL_media_unpack_byte_3 GPRV4I32:$src)))]>;
+
+def SEMAPHORE_INIT : BinaryOpNoRet<IL_OP_SEMAPHORE_INIT, (outs),
+    (ins MEM3232:$ptr, GPRI32:$val),
+    !strconcat(IL_OP_SEMAPHORE_INIT.Text, "_id($ptr)_value($val)"),
+    [(int_AMDIL_semaphore_init ADDR:$ptr, GPRI32:$val)]>;
+
+def SEMAPHORE_WAIT : UnaryOpNoRet<IL_OP_SEMAPHORE_WAIT, (outs),
+    (ins MEM3232:$ptr),
+    !strconcat(IL_OP_SEMAPHORE_WAIT.Text, "_id($ptr)"),
+    [(int_AMDIL_semaphore_wait ADDR:$ptr)]>;
+
+def SEMAPHORE_SIGNAL : UnaryOpNoRet<IL_OP_SEMAPHORE_SIGNAL, (outs),
+    (ins MEM3232:$ptr),
+    !strconcat(IL_OP_SEMAPHORE_SIGNAL.Text, "_id($ptr)"),
+    [(int_AMDIL_semaphore_signal ADDR:$ptr)]>;
+
+let hasIEEEFlag = 1 in {
+  defm MIN3 : TernaryIntrinsicFloat<IL_OP_MIN3, int_AMDIL_min3>;
+  defm MED3 : TernaryIntrinsicFloat<IL_OP_MED3, int_AMDIL_med3>;
+  defm MAX3 : TernaryIntrinsicFloat<IL_OP_MAX3, int_AMDIL_max3>;
+  defm IMIN3 : TernaryIntrinsicInt<IL_OP_I_MIN3, int_AMDIL_min3_i32>;
+  defm IMED3 : TernaryIntrinsicInt<IL_OP_I_MED3, int_AMDIL_med3_i32>;
+  defm IMAX3 : TernaryIntrinsicInt<IL_OP_I_MAX3, int_AMDIL_max3_i32>;
+  defm UMIN3 : TernaryIntrinsicInt<IL_OP_U_MIN3, int_AMDIL_min3_u32>;
+  defm UMED3 : TernaryIntrinsicInt<IL_OP_U_MED3, int_AMDIL_med3_u32>;
+  defm UMAX3 : TernaryIntrinsicInt<IL_OP_U_MAX3, int_AMDIL_max3_u32>;
+}
+
+def CLASS_f32 : TwoInOneOut<IL_OP_CLASS, (outs GPRI32:$dst),
+    (ins GPRF32:$src, GPRI32:$flag),
+    !strconcat(IL_OP_CLASS.Text, " $dst, $src, $flag"),
+    [(set GPRI32:$dst,
+        (int_AMDIL_class GPRF32:$src, GPRI32:$flag))]>;
+def CLASS_v2f32 : TwoInOneOut<IL_OP_CLASS, (outs GPRV2I32:$dst),
+    (ins GPRV2F32:$src, GPRV2I32:$flag),
+    !strconcat(IL_OP_CLASS.Text, " $dst, $src, $flag"),
+    [(set GPRV2I32:$dst,
+        (int_AMDIL_class GPRV2F32:$src, GPRV2I32:$flag))]>;
+def CLASS_v4f32 : TwoInOneOut<IL_OP_CLASS, (outs GPRV4I32:$dst),
+    (ins GPRV4F32:$src, GPRV4I32:$flag),
+    !strconcat(IL_OP_CLASS.Text, " $dst, $src, $flag"),
+    [(set GPRV4I32:$dst,
+        (int_AMDIL_class GPRV4F32:$src, GPRV4I32:$flag))]>;
+def CLASS_f64 : TwoInOneOut<IL_OP_D_CLASS, (outs GPRI32:$dst),
+    (ins GPRF64:$src, GPRI32:$flag),
+    !strconcat(IL_OP_D_CLASS.Text, " $dst, $src, $flag"),
+    [(set GPRI32:$dst,
+        (int_AMDIL_class GPRF64:$src, GPRI32:$flag))]>;
+def CLASS_v2f64 : TwoInOneOut<IL_OP_D_CLASS, (outs GPRV2I32:$dst),
+    (ins GPRV2F64:$src, GPRV2I32:$flag),
+    !strconcat(IL_OP_D_CLASS.Text, " $dst, $src, $flag"),
+    [(set GPRV2I32:$dst,
+        (int_AMDIL_class GPRV2F64:$src, GPRV2I32:$flag))]>;
+
+
+defm FREXP_EXP : IntrConvertF32TOI32<IL_OP_FREXP_EXP, int_AMDIL_frexp_exp>;
+def FREXP_EXP_f64 : OneInOneOut<IL_OP_D_FREXP_EXP, (outs GPRI32:$dst),
+    (ins GPRF64:$src), 
+    !strconcat(IL_OP_D_FREXP_EXP.Text, " $dst, $src"),
+    [(set GPRI32:$dst,
+        (int_AMDIL_frexp_exp GPRF64:$src))]>;
+def FREXP_EXP_v2f64 : OneInOneOut<IL_OP_D_FREXP_EXP, (outs GPRV2I32:$dst),
+    (ins GPRV2F64:$src), 
+    !strconcat(IL_OP_D_FREXP_EXP.Text, " $dst, $src"),
+    [(set GPRV2I32:$dst,
+        (int_AMDIL_frexp_exp GPRV2F64:$src))]>;
+
+defm FREXP_MANT : UnaryIntrinsicFloat<IL_OP_FREXP_MANT, int_AMDIL_frexp_mant>;
+defm SAD16  : TernaryIntrinsicInt<IL_OP_SAD_U16, int_AMDIL_media_sad16>;
+defm SAD32  : TernaryIntrinsicInt<IL_OP_SAD_U32, int_AMDIL_media_sad32>;
+
+let hasZeroOpFlag = 1 in {
+  let mayLoad = 0, mayStore=0 in {
+defm DDIV_INT  : BinaryIntrinsicDouble<IL_OP_D_DIV, int_AMDIL_div>;
+defm DDIV  : BinaryOpMCf64<IL_OP_D_DIV, fdiv>;
+  }
+}
+
+defm FREXP_MANT : UnaryIntrinsicDouble<IL_OP_D_FREXP_MANT, int_AMDIL_frexp_mant>;
+
+def DTRIG_PREOP : TwoInOneOut<IL_OP_D_TRIG_PREOP, (outs GPRF64:$dst),
+    (ins GPRF64:$src0, GPRF32:$src1),
+    !strconcat(IL_OP_D_TRIG_PREOP.Text, " $dst, $src0, $src1"),
+    [(set GPRF64:$dst, 
+        (int_AMDIL_trig_preop_f64 GPRF64:$src0, GPRF32:$src1))]>;
+
+
+def LDEXP_f32 : TwoInOneOut<IL_OP_LDEXP, (outs GPRF32:$dst),
+    (ins GPRF32:$src, GPRI32:$src1),
+    !strconcat(IL_OP_LDEXP.Text, " $dst, $src, $src1"),
+    [(set GPRF32:$dst,
+        (int_AMDIL_ldexp GPRF32:$src, GPRI32:$src1))]>;
+
+def LDEXP_v2f32 : TwoInOneOut<IL_OP_LDEXP, (outs GPRV2F32:$dst),
+    (ins GPRV2F32:$src, GPRV2I32:$src1),
+    !strconcat(IL_OP_LDEXP.Text, " $dst, $src, $src1"),
+    [(set GPRV2F32:$dst,
+        (int_AMDIL_ldexp GPRV2F32:$src, GPRV2I32:$src1))]>;
+
+def LDEXP_v4f32 : TwoInOneOut<IL_OP_LDEXP, (outs GPRV4F32:$dst),
+    (ins GPRV4F32:$src, GPRV4I32:$src1),
+    !strconcat(IL_OP_LDEXP.Text, " $dst, $src, $src1"),
+    [(set GPRV4F32:$dst,
+        (int_AMDIL_ldexp GPRV4F32:$src, GPRV4I32:$src1))]>;
+defm MSAD  : TernaryIntrinsicInt<IL_OP_MSAD, int_AMDIL_media_msad>;
+
+def QSAD_i64  : ThreeInOneOut<IL_OP_QSAD, (outs GPRI64:$dst),
+    (ins GPRI64:$src0, GPRI32:$src1, GPRI64:$src2),
+    !strconcat(IL_OP_QSAD.Text, " $dst, $src0, $src1, $src2"),
+    [(set GPRI64:$dst,
+        (int_AMDIL_media_qsad GPRI64:$src0, GPRI32:$src1, GPRI64:$src2))]>;
+
+def MQSAD_i64  : ThreeInOneOut<IL_OP_MQSAD, (outs GPRI64:$dst),
+    (ins GPRI64:$src0, GPRI32:$src1, GPRI64:$src2),
+    !strconcat(IL_OP_MQSAD.Text, " $dst, $src0, $src1, $src2"),
+    [(set GPRI64:$dst,
+        (int_AMDIL_media_mqsad GPRI64:$src0, GPRI32:$src1, GPRI64:$src2))]>;
+
+defm ADD_RTE : BinaryIntrinsicFloat<IL_OP_ADD_RTE, int_AMDIL_add_rte>;
+defm ADD_RTP : BinaryIntrinsicFloat<IL_OP_ADD_RTP, int_AMDIL_add_rtp>;
+defm ADD_RTN : BinaryIntrinsicFloat<IL_OP_ADD_RTN, int_AMDIL_add_rtn>;
+defm ADD_RTZ : BinaryIntrinsicFloat<IL_OP_ADD_RTZ, int_AMDIL_add_rtz>;
+defm SUB_RTE : BinaryIntrinsicFloat<IL_OP_SUB_RTE, int_AMDIL_sub_rte>;
+defm SUB_RTP : BinaryIntrinsicFloat<IL_OP_SUB_RTP, int_AMDIL_sub_rtp>;
+defm SUB_RTN : BinaryIntrinsicFloat<IL_OP_SUB_RTN, int_AMDIL_sub_rtn>;
+defm SUB_RTZ : BinaryIntrinsicFloat<IL_OP_SUB_RTZ, int_AMDIL_sub_rtz>;
+defm MUL_RTE : BinaryIntrinsicFloat<IL_OP_MUL_RTE, int_AMDIL_mul_rte>;
+defm MUL_RTP : BinaryIntrinsicFloat<IL_OP_MUL_RTP, int_AMDIL_mul_rtp>;
+defm MUL_RTN : BinaryIntrinsicFloat<IL_OP_MUL_RTN, int_AMDIL_mul_rtn>;
+defm MUL_RTZ : BinaryIntrinsicFloat<IL_OP_MUL_RTZ, int_AMDIL_mul_rtz>;
+defm MAD_RTE : TernaryIntrinsicFloat<IL_OP_MAD_RTE, int_AMDIL_mad_rte>;
+defm MAD_RTP : TernaryIntrinsicFloat<IL_OP_MAD_RTP, int_AMDIL_mad_rtp>;
+defm MAD_RTN : TernaryIntrinsicFloat<IL_OP_MAD_RTN, int_AMDIL_mad_rtn>;
+defm MAD_RTZ : TernaryIntrinsicFloat<IL_OP_MAD_RTZ, int_AMDIL_mad_rtz>;
+defm FMA_RTE : TernaryIntrinsicFloat<IL_OP_FMA_RTE, int_AMDIL_fma_rte>;
+defm FMA_RTP : TernaryIntrinsicFloat<IL_OP_FMA_RTP, int_AMDIL_fma_rtp>;
+defm FMA_RTN : TernaryIntrinsicFloat<IL_OP_FMA_RTN, int_AMDIL_fma_rtn>;
+defm FMA_RTZ : TernaryIntrinsicFloat<IL_OP_FMA_RTZ, int_AMDIL_fma_rtz>;
+defm ADD_RTE : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTE, int_AMDIL_add_rte>;
+defm ADD_RTP : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTP, int_AMDIL_add_rtp>;
+defm ADD_RTN : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTN, int_AMDIL_add_rtn>;
+defm ADD_RTZ : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTZ, int_AMDIL_add_rtz>;
+defm SUB_RTE : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTE, int_AMDIL_sub_rte>;
+defm SUB_RTP : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTP, int_AMDIL_sub_rtp>;
+defm SUB_RTN : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTN, int_AMDIL_sub_rtn>;
+defm SUB_RTZ : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTZ, int_AMDIL_sub_rtz>;
+defm MUL_RTE : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTE, int_AMDIL_mul_rte>;
+defm MUL_RTP : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTP, int_AMDIL_mul_rtp>;
+defm MUL_RTN : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTN, int_AMDIL_mul_rtn>;
+defm MUL_RTZ : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTZ, int_AMDIL_mul_rtz>;
+defm MAD_RTE : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTE, int_AMDIL_mad_rte>;
+defm MAD_RTP : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTP, int_AMDIL_mad_rtp>;
+defm MAD_RTN : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTN, int_AMDIL_mad_rtn>;
+defm MAD_RTZ : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTZ, int_AMDIL_mad_rtz>;
+
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,193 @@
+//===-- AMDILIntrinsicInfo.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL Implementation of the IntrinsicInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include <cstring>
+using namespace llvm;
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "AMDILGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+AMDILIntrinsicInfo::AMDILIntrinsicInfo(AMDILTargetMachine *tm)
+  : TargetIntrinsicInfo(), mTM(tm)
+{
+}
+
+std::string
+AMDILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
+                            unsigned int numTys) const
+{
+  static const char* const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+
+  //assert(!isOverloaded(IntrID)
+  //&& "AMDIL Intrinsics are not overloaded");
+  if (IntrID < Intrinsic::num_intrinsics) {
+    return "";
+  }
+  assert(IntrID < AMDILIntrinsic::num_AMDIL_intrinsics
+         && "Invalid intrinsic ID");
+
+  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+  return Result;
+}
+
+static bool
+checkTruncation(const char *Name, unsigned int& Len)
+{
+  const char *ptr = Name + (Len - 1);
+  while(ptr != Name && *ptr != '_') {
+    --ptr;
+  }
+  // We don't want to truncate on atomic instructions
+  // but we do want to enter the check Truncation
+  // section so that we can translate the atomic
+  // instructions if we need to.
+  if (!strncmp(Name, "__atom", 6)) {
+    return true;
+  }
+  if (strstr(ptr, "i32")
+      || strstr(ptr, "u32")
+      || strstr(ptr, "i64")
+      || strstr(ptr, "u64")
+      || strstr(ptr, "f32")
+      || strstr(ptr, "f64")
+      || strstr(ptr, "i16")
+      || strstr(ptr, "u16")
+      || strstr(ptr, "i8")
+      || strstr(ptr, "u8")) {
+    Len = (unsigned int)(ptr - Name);
+    return true;
+  }
+  return false;
+}
+
+// We don't want to support both the OpenCL 1.0 atomics
+// and the 1.1 atomics with different names, so we translate
+// the 1.0 atomics to the 1.1 naming here if needed.
+static char*
+atomTranslateIfNeeded(const char *Name, unsigned int Len)
+{
+  char *buffer = NULL;
+  if (strncmp(Name, "__atom_", 7))  {
+    // If we are not starting with __atom_, then
+    // go ahead and continue on with the allocation.
+    buffer = new char[Len + 1];
+    memcpy(buffer, Name, Len);
+  } else {
+    buffer = new char[Len + 3];
+    memcpy(buffer, "__atomic_", 9);
+    memcpy(buffer + 9, Name + 7, Len - 7);
+    Len += 2;
+  }
+  buffer[Len] = '\0';
+  return buffer;
+}
+
+unsigned int
+AMDILIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
+{
+#define GET_FUNCTION_RECOGNIZER
+#include "AMDILGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+  AMDILIntrinsic::ID IntrinsicID
+  = (AMDILIntrinsic::ID)Intrinsic::not_intrinsic;
+  if (checkTruncation(Name, Len)) {
+    char *buffer = atomTranslateIfNeeded(Name, Len);
+    IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", buffer);
+    delete [] buffer;
+  } else {
+    IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
+  }
+  if (!isValidIntrinsic(IntrinsicID)) {
+    return 0;
+  }
+  if (IntrinsicID != (AMDILIntrinsic::ID)Intrinsic::not_intrinsic) {
+    return IntrinsicID;
+  }
+  return 0;
+}
+
+bool
+AMDILIntrinsicInfo::isOverloaded(unsigned IntrID) const
+{
+  if (IntrID == 0)
+    return false;
+  // Overload Table
+  unsigned id = IntrID - Intrinsic::num_intrinsics + 1;
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function*
+AMDILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+                                   Type **Tys,
+                                   unsigned numTys) const
+{
+  assert(!isOverloaded(IntrID) && "AMDIL intrinsics are not overloaded");
+  AttrListPtr AList = getAttributes((AMDILIntrinsic::ID) IntrID);
+  LLVMContext& Context = M->getContext();
+  unsigned int id = IntrID;
+  Type *ResultTy = NULL;
+  std::vector<Type*> ArgTys;
+  bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+  // We need to add the resource ID argument for atomics.
+  if (id >= AMDILIntrinsic::AMDIL_atomic_add_gi32
+      && id <= AMDILIntrinsic::AMDIL_atomic_xor_ru64_noret) {
+    ArgTys.push_back(IntegerType::get(Context, 32));
+  }
+
+  return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+                        FunctionType::get(ResultTy, ArgTys, IsVarArg),
+                        AList));
+}
+
+/// Because the code generator has to support different SC versions,
+/// this function is added to check that the intrinsic being used
+/// is actually valid. In the case where it isn't valid, the
+/// function call is not translated into an intrinsic and the
+/// fall back software emulated path should pick up the result.
+bool
+AMDILIntrinsicInfo::isValidIntrinsic(unsigned int IntrID) const
+{
+  const AMDILSubtarget *stm = mTM->getSubtargetImpl();
+  switch (IntrID) {
+  default:
+    return true;
+  case AMDILIntrinsic::AMDIL_convert_f32_i32_rpi:
+  case AMDILIntrinsic::AMDIL_convert_f32_i32_flr:
+  case AMDILIntrinsic::AMDIL_convert_f32_f16_near:
+  case AMDILIntrinsic::AMDIL_convert_f32_f16_neg_inf:
+  case AMDILIntrinsic::AMDIL_convert_f32_f16_plus_inf:
+    return stm->calVersion() >= CAL_VERSION_SC_139;
+  };
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,53 @@
+//===-- AMDILIntrinsicInfo.h ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the AMDIL Implementation of the Intrinsic Info class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_INTRINSICS_H_
+#define _AMDIL_INTRINSICS_H_
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Intrinsics.h"
+
+namespace llvm
+{
+class AMDILTargetMachine;
+namespace AMDILIntrinsic
+{
+enum ID {
+  last_non_AMDIL_intrinsic = Intrinsic::num_intrinsics - 1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+  , num_AMDIL_intrinsics
+};
+
+}
+
+
+class AMDILIntrinsicInfo : public TargetIntrinsicInfo
+{
+  AMDILTargetMachine *mTM;
+public:
+  AMDILIntrinsicInfo(AMDILTargetMachine *tm);
+  std::string getName(unsigned int IntrId, Type **Tys = 0,
+                      unsigned int numTys = 0) const;
+  unsigned int lookupName(const char *Name, unsigned int Len) const;
+  bool isOverloaded(unsigned int IID) const;
+  Function *getDeclaration(Module *M, unsigned int ID,
+                           Type **Tys = 0,
+                           unsigned int numTys = 0) const;
+  bool isValidIntrinsic(unsigned int) const;
+}; // AMDILIntrinsicInfo
+}
+
+#endif // _AMDIL_INTRINSICS_H_
+





More information about the llvm-branch-commits mailing list