[llvm-branch-commits] [llvm-branch] r161895 [1/5] - in /llvm/branches/AMDILBackend/lib/Target: ./ AMDIL/ AMDIL/TargetInfo/
Victor Oliveira
Victor.Oliveira at amd.com
Tue Aug 14 14:38:59 PDT 2012
Author: victorm
Date: Tue Aug 14 16:38:58 2012
New Revision: 161895
URL: http://llvm.org/viewvc/llvm-project?rev=161895&view=rev
Log:
AMDIL Backend - First commit
Added:
llvm/branches/AMDILBackend/lib/Target/AMDIL/
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAlgorithms.tpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCallingConv.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrConversion.macros
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMPC.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMVersion.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem32.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem64.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMultiClass.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNodes.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILOperands.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPatterns.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManagerImpl.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPrintfConvert.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILProfiles.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsScalar.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV2.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV4.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalar.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarW.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarX.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarY.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarZ.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2XY.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2ZW.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV4.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTokenDesc.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILUtilityFunctions.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILUtilityFunctions.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILVersion.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/CMakeLists.txt
llvm/branches/AMDILBackend/lib/Target/AMDIL/LLVMBuild.txt
llvm/branches/AMDILBackend/lib/Target/AMDIL/Processors.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/TargetInfo/
llvm/branches/AMDILBackend/lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/TargetInfo/CMakeLists.txt
llvm/branches/AMDILBackend/lib/Target/AMDIL/TargetInfo/LLVMBuild.txt
llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodata.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodata.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodb.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodb_gen.h
Modified:
llvm/branches/AMDILBackend/lib/Target/LLVMBuild.txt
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,318 @@
+//===-- AMDIL.h -----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the
+// LLVM AMDIL back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H_
+#define AMDIL_H_
+#include "AMDILLLVMPC.h"
+#include "AMDILLLVMVersion.h"
+#include "AMDILInstPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define AMDIL_MAJOR_VERSION 3
+#define AMDIL_MINOR_VERSION 1
+#define AMDIL_REVISION_NUMBER 104
+#define AMDIL_20_REVISION_NUMBER 88
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+#define OPENCL_MAX_NUM_SEMAPHORES 15
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID 1
+#define DEFAULT_GDS_ID 1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS 8
+
+// SC->CAL version matchings.
+#define CAL_CACHED_ALIGNED_UAVS 1679
+#define CAL_VERSION_SC_156 1650
+#define CAL_VERSION_SC_155 1630
+#define CAL_VERSION_SC_154 1624
+#define CAL_VERSION_SC_153 1616
+#define CAL_VERSION_SC_152 1603
+#define CAL_VERSION_SC_151 1589
+#define CAL_VERSION_SC_150 1561
+#define CAL_VERSION_SC_149 CAL_VERSION_SC_150
+#define CAL_VERSION_SC_148 1525
+#define CAL_VERSION_SC_147 CAL_VERSION_SC_148
+#define CAL_VERSION_SC_146 CAL_VERSION_SC_148
+#define CAL_VERSION_SC_145 1451
+#define CAL_VERSION_SC_144 CAL_VERSION_SC_145
+#define CAL_VERSION_SC_143 1441
+#define CAL_VERSION_SC_142 CAL_VERSION_SC_142
+#define CAL_VERSION_SC_141 1420
+#define CAL_VERSION_SC_140 1400
+#define CAL_VERSION_SC_139 1387
+#define CAL_VERSION_SC_138 CAL_VERSION_SC_139
+#define CAL_APPEND_BUFFER_SUPPORT 1340
+#define CAL_VERSION_SC_137 1331
+#define CAL_VERSION_SC_136 982
+#define CAL_VERSION_SC_135 950
+#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990
+
+#define OCL_DEVICE_RV710 0x00001
+#define OCL_DEVICE_RV730 0x00002
+#define OCL_DEVICE_RV770 0x00004
+#define OCL_DEVICE_CEDAR 0x00008
+#define OCL_DEVICE_REDWOOD 0x00010
+#define OCL_DEVICE_JUNIPER 0x00020
+#define OCL_DEVICE_CYPRESS 0x00040
+#define OCL_DEVICE_CAICOS 0x00080
+#define OCL_DEVICE_TURKS 0x00100
+#define OCL_DEVICE_BARTS 0x00200
+#define OCL_DEVICE_CAYMAN 0x00400
+#define OCL_DEVICE_TAHITI 0x00800
+#define OCL_DEVICE_PITCAIRN 0x01000
+#define OCL_DEVICE_CAPEVERDE 0x02000
+#define OCL_DEVICE_TRINITY 0x04000
+#define OCL_DEVICE_DOGS 0x08000
+#define OCL_DEVICE_CATS 0x10000
+#define OCL_DEVICE_BUNNIES 0x20000
+#define OCL_DEVICE_ALL 0xFFFFF
+
+/// The number of function ID's that are reserved for
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+namespace llvm
+{
+class AMDILInstrPrinter;
+class AMDILTargetMachine;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+/// Instruction selection passes.
+FunctionPass*
+createAMDILISelDag(AMDILTargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILBarrierDetect(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILPrintfConvert(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILInlinePass(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILPeepholeOpt(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+/// Pre regalloc passes.
+FunctionPass*
+createAMDILPointerManager(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILMachinePeephole(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+/// Pre emit passes.
+FunctionPass* createMachinePostDominatorTreePass();
+FunctionPass*
+createAMDILCFGPreparationPass();
+FunctionPass*
+createAMDILCFGStructurizerPass();
+FunctionPass*
+createAMDILLiteralManager(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILIOExpansion(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+createAMDILSwizzleEncoder(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+/// Instruction Emission Passes
+AMDILInstPrinter *createAMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI);
+
+extern Target TheAMDILTarget;
+} // end namespace llvm;
+
+#define GET_REGINFO_ENUM
+#include "AMDILGenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "AMDILGenInstrInfo.inc"
+
+/// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm
+{
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDILAS
+{
+enum AddressSpaces {
+ PRIVATE_ADDRESS = 0, // Address space for private memory.
+ GLOBAL_ADDRESS = 1, // Address space for global memory.
+ CONSTANT_ADDRESS = 2, // Address space for constant memory.
+ LOCAL_ADDRESS = 3, // Address space for local memory.
+ REGION_ADDRESS = 4, // Address space for region memory.
+ GLOBAL_HOST_ADDRESS = 5, // Address space with global host endianness.
+ CONSTANT_HOST_ADDRESS = 6, // Address space with constant host endianness.
+ FLAT_ADDRESS = 7, // Address space for flat memory.
+ ADDRESS_NONE = 8 // Address space for unknown memory.
+};
+
+// We are piggybacking on the CommentFlag enum in MachineInstr.h to
+// set bits in AsmPrinterFlags of the MachineInstruction. We will
+// start at bit 16 and allocate down while LLVM will start at bit
+// 1 and allocate up.
+
+// This union/struct combination is an easy way to read out the
+// exact bits that are needed.
+typedef union ResourceRec {
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned short CacheableRead : 1; // Flag to specify if the read is
+ // cacheable. (Permanent)
+ unsigned short HardwareInst : 1; // Flag to specify that this instruction
+ // is a hardware instruction. (Permanent)
+ unsigned short ResourceID : 10; // Flag to specify the resource ID for
+ // the op. (Permanent)
+ unsigned short PointerPath : 1; // Flag to specify if the op is on the
+ // pointer path.
+ unsigned short ByteStore : 1; // Flag to specify if the op is byte
+ // store op.
+ unsigned short ConflictPtr : 1; // Flag to specify that the pointer has
+ // a conflict.
+ unsigned short isImage : 1; // Reserved for future use.
+#else
+ unsigned short isImage : 1; // Reserved for future use/llvm.
+ unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a
+ // conflict.
+ unsigned short ByteStore : 1; // Flag to specify if the op is a byte
+ // store op.
+ unsigned short PointerPath : 1; // Flag to specify if the op is on the
+ // pointer path.
+ unsigned short ResourceID : 10; // Flag to specify the resourece ID for
+ // the op. (Permanent)
+ unsigned short HardwareInst : 1; // Flag to specify that this instruction
+ // is a hardware instruction. (Permanent)
+ unsigned short CacheableRead : 1; // Flag to specify if the read is
+ // cacheable. (Permanent)
+#endif
+ } bits;
+ unsigned short u16all;
+} InstrResEnc;
+
+} // namespace AMDILAS
+
+// The OpSwizzle encodes a subset of all possible
+// swizzle combinations into a number of bits using
+// only the combinations utilized by the backend.
+// The lower 128 are for source swizzles and the
+// upper 128 or for destination swizzles.
+// The valid mappings can be found in the
+// getSrcSwizzle and getDstSwizzle functions of
+// AMDILUtilityFunctions.cpp.
+typedef union SwizzleRec {
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned char dst : 1;
+ unsigned char swizzle : 7;
+#else
+ unsigned char swizzle : 7;
+ unsigned char dst : 1;
+#endif
+ } bits;
+ unsigned char u8all;
+} OpSwizzle;
+// Enums corresponding to AMDIL condition codes for IL. These
+// values must be kept in sync with the ones in the .td file.
+namespace AMDILCC
+{
+enum CondCodes {
+ // AMDIL specific condition codes. These correspond to the IL_CC_*
+ // in AMDILInstrInfo.td and must be kept in the same order.
+ IL_CC_D_EQ = 0, // DEQ instruction.
+ IL_CC_D_GE = 1, // DGE instruction.
+ IL_CC_D_LT = 2, // DLT instruction.
+ IL_CC_D_NE = 3, // DNE instruction.
+ IL_CC_F_EQ = 4, // EQ instruction.
+ IL_CC_F_GE = 5, // GE instruction.
+ IL_CC_F_LT = 6, // LT instruction.
+ IL_CC_F_NE = 7, // NE instruction.
+ IL_CC_I_EQ = 8, // IEQ instruction.
+ IL_CC_I_GE = 9, // IGE instruction.
+ IL_CC_I_LT = 10, // ILT instruction.
+ IL_CC_I_NE = 11, // INE instruction.
+ IL_CC_U_GE = 12, // UGE instruction.
+ IL_CC_U_LT = 13, // ULE instruction.
+ // Pseudo IL Comparison instructions here.
+ IL_CC_F_GT = 14, // GT instruction.
+ IL_CC_U_GT = 15,
+ IL_CC_I_GT = 16,
+ IL_CC_D_GT = 17,
+ IL_CC_F_LE = 18, // LE instruction
+ IL_CC_U_LE = 19,
+ IL_CC_I_LE = 20,
+ IL_CC_D_LE = 21,
+ IL_CC_F_UNE = 22,
+ IL_CC_F_UEQ = 23,
+ IL_CC_F_ULT = 24,
+ IL_CC_F_UGT = 25,
+ IL_CC_F_ULE = 26,
+ IL_CC_F_UGE = 27,
+ IL_CC_F_ONE = 28,
+ IL_CC_F_OEQ = 29,
+ IL_CC_F_OLT = 30,
+ IL_CC_F_OGT = 31,
+ IL_CC_F_OLE = 32,
+ IL_CC_F_OGE = 33,
+ IL_CC_D_UNE = 34,
+ IL_CC_D_UEQ = 35,
+ IL_CC_D_ULT = 36,
+ IL_CC_D_UGT = 37,
+ IL_CC_D_ULE = 38,
+ IL_CC_D_UGE = 39,
+ IL_CC_D_ONE = 40,
+ IL_CC_D_OEQ = 41,
+ IL_CC_D_OLT = 42,
+ IL_CC_D_OGT = 43,
+ IL_CC_D_OLE = 44,
+ IL_CC_D_OGE = 45,
+ IL_CC_U_EQ = 46,
+ IL_CC_U_NE = 47,
+ IL_CC_F_O = 48,
+ IL_CC_D_O = 49,
+ IL_CC_F_UO = 50,
+ IL_CC_D_UO = 51,
+ IL_CC_L_LE = 52,
+ IL_CC_L_GE = 53,
+ IL_CC_L_EQ = 54,
+ IL_CC_L_NE = 55,
+ IL_CC_L_LT = 56,
+ IL_CC_L_GT = 57,
+ IL_CC_UL_LE = 58,
+ IL_CC_UL_GE = 59,
+ IL_CC_UL_EQ = 60,
+ IL_CC_UL_NE = 61,
+ IL_CC_UL_LT = 62,
+ IL_CC_UL_GT = 63,
+ COND_ERROR = 64
+};
+
+} // end namespace AMDILCC
+} // end namespace llvm
+#endif // AMDIL_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,16 @@
+//===-- AMDIL.td ----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AMDILBase.td"
+include "AMDILVersion.td"
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,676 @@
+//===-- AMDIL789IOExpansion.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the IO expansion class for 789 devices.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILIOExpansion.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cstdio>
+
+using namespace llvm;
+AMDIL789IOExpansion::AMDIL789IOExpansion(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : AMDILIOExpansion(tm, OptLevel)
+{
+}
+
+AMDIL789IOExpansion::~AMDIL789IOExpansion()
+{
+}
+
+const char *AMDIL789IOExpansion::getPassName() const
+{
+ return "AMDIL 789 IO Expansion Pass";
+}
+// This code produces the following pseudo-IL:
+// cmov_logical r1006.x___, r1008.y, r1006.y, r1006.x
+// cmov_logical r1006.x___, r1008.z, r1006.x, r1006.z
+// cmov_logical $dst.x___, r1008.w, r1006.x, r1006.w
+void
+AMDIL789IOExpansion::emitComponentExtract(MachineInstr *MI,
+ unsigned src, unsigned dst, bool before)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32),
+ (src - AMDIL::R1) + AMDIL::Rx1)
+ .addReg(AMDIL::Ry1008)
+ .addReg((src - AMDIL::R1) + AMDIL::Ry1)
+ .addReg((src - AMDIL::R1) + AMDIL::Rx1);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32),
+ (src - AMDIL::R1) + AMDIL::Rx1)
+ .addReg(AMDIL::Rz1008)
+ .addReg((src - AMDIL::R1) + AMDIL::Rz1)
+ .addReg((src - AMDIL::R1) + AMDIL::Rx1);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), dst)
+ .addReg(AMDIL::Rw1008)
+ .addReg((src - AMDIL::R1) + AMDIL::Rw1)
+ .addReg((src - AMDIL::R1) + AMDIL::Rx1);
+}
+// We have a 128 bit load but a 8/16/32bit value, so we need to
+// select the correct component and make sure that the correct
+// bits are selected. For the 8 and 16 bit cases we need to
+// extract from the component the correct bits and for 32 bits
+// we just need to select the correct component.
+void
+AMDIL789IOExpansion::emitDataLoadSelect(MachineInstr *MI)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ emitComponentExtract(MI, AMDIL::R1011, AMDIL::Rx1011, false);
+ if (getMemorySize(MI) == 1) {
+ // This produces the following pseudo-IL:
+ // iand r1006.x___, r1010.xxxx, l14.xxxx
+ // iadd r1006, r1006.x, {0, -1, 2, 3}
+ // ieq r1008, r1006, 0
+ // ishr r1011, r1011.x, {0, 8, 16, 24}
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1006)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1006)
+ .addReg(AMDIL::Rx1006)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1006)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::Rx1011)
+ .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+ emitComponentExtract(MI, AMDIL::R1011, AMDIL::Rx1011, false);
+ } else if (getMemorySize(MI) == 2) {
+ // This produces the following pseudo-IL:
+ // ishr r1007.x___, r1010.xxxx, 1
+ // iand r1008.x___, r1007.xxxx, 1
+ // ishr r1007.x___, r1011.xxxx, 16
+ // cmov_logical r1011.x___, r1008.xxxx, r1007.xxxx, r1011.xxxx
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1007)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1011);
+ }
+}
+// This function does address calculations modifications to load from a vector
+// register type instead of a dword addressed load.
+void
+AMDIL789IOExpansion::emitVectorAddressCalc(MachineInstr *MI, bool is32bit, bool needsSelect)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ // This produces the following pseudo-IL:
+ // ishr r1007.x___, r1010.xxxx, (is32bit) ? 2 : 3
+ // iand r1008.x___, r1007.xxxx, (is32bit) ? 3 : 1
+ // ishr r1007.x___, r1007.xxxx, (is32bit) ? 2 : 1
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal((is32bit) ? 0x2 : 3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1007)
+ .addImm(mMFI->addi32Literal((is32bit) ? 3 : 1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1007)
+ .addImm(mMFI->addi32Literal((is32bit) ? 2 : 1));
+ if (needsSelect) {
+ // If the component selection is required, the following
+ // pseudo-IL is produced.
+ // iadd r1008, r1008.x, (is32bit) ? {0, -1, -2, -3} : {0, 0, -1, -1}
+ // ieq r1008, r1008, 0
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi128Literal((is32bit) ? 0xFFFFFFFFULL << 32 : 0ULL,
+ (is32bit) ? 0xFFFFFFFEULL | (0xFFFFFFFDULL << 32) :
+ -1ULL));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0));
+ }
+}
+// This function emits a switch statement and writes 32bit/64bit
+// value to a 128bit vector register type.
+void
+AMDIL789IOExpansion::emitVectorSwitchWrite(MachineInstr *MI, bool is32bit)
+{
+ uint32_t xID = getPointerID(MI);
+ assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
+ // This section generates the following pseudo-IL:
+ // switch r1008.x
+ // default
+ // mov x1[r1007.x].(is32bit) ? x___ : xy__, r1011.x{y}
+ // break
+ // case 1
+ // mov x1[r1007.x].(is32bit) ? _y__ : __zw, r1011.x{yxy}
+ // break
+ // if is32bit is true, case 2 and 3 are emitted.
+ // case 2
+ // mov x1[r1007.x].__z_, r1011.x
+ // break
+ // case 3
+ // mov x1[r1007.x].___w, r1011.x
+ // break
+ // endswitch
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SWITCH))
+ .addReg(AMDIL::Rx1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DEFAULT));
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SCRATCHSTORE),
+ (is32bit) ? AMDIL::Rx1007 : AMDIL::Rxy1007)
+ .addReg((is32bit) ? AMDIL::Rx1011 : AMDIL::Rxy1011)
+ .addImm(xID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CASE)).addImm(1);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SCRATCHSTORE),
+ (is32bit) ? AMDIL::Ry1007 : AMDIL::Rzw1007)
+ .addReg(is32bit ? AMDIL::Rx1011 : AMDIL::Rxy1011)
+ .addImm(xID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
+ if (is32bit) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CASE)).addImm(2);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SCRATCHSTORE), AMDIL::Rz1007)
+ .addReg(AMDIL::Rx1011)
+ .addImm(xID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CASE)).addImm(3);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SCRATCHSTORE), AMDIL::Rw1007)
+ .addReg(AMDIL::Rx1011)
+ .addImm(xID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
+ }
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ENDSWITCH));
+
+}
+void
+AMDIL789IOExpansion::expandPrivateLoad(MachineInstr *MI)
+{
+ bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+ if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesScratch() && mMFI->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t xID = getPointerID(MI);
+ assert(xID && "Found a scratch load that was incorrectly marked as zero ID!\n");
+ if (!xID) {
+ xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ // Since the private register is a 128 bit aligned, we have to align the address
+ // first, since our source address is 32bit aligned and then load the data.
+ // This produces the following pseudo-IL:
+ // ishr r1010.x___, r1010.xxxx, 4
+ // mov r1011, x1[r1010.x]
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SHR_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(4));
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(xID);
+ break;
+ case 1:
+ case 2:
+ case 4:
+ emitVectorAddressCalc(MI, true, true);
+ // This produces the following pseudo-IL:
+ // mov r1011, x1[r1007.x]
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+ .addReg(AMDIL::Rx1007)
+ .addImm(xID);
+ // These instructions go after the current MI.
+ emitDataLoadSelect(MI);
+ break;
+ case 8:
+ emitVectorAddressCalc(MI, false, true);
+ // This produces the following pseudo-IL:
+ // mov r1011, x1[r1007.x]
+ // cmov_logical r1011.xy__, r1008.xxxx, r1011.xy, r1011.zw
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+ .addReg(AMDIL::Rx1007)
+ .addImm(xID);
+ // These instructions go after the current MI.
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::CMOVLOG_i64), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rxy1011)
+ .addReg(AMDIL::Rzw1011);
+ break;
+ }
+ unsigned dataReg;
+ expandPackedData(MI);
+ dataReg = expandExtendLoad(MI);
+ if (!dataReg) {
+ dataReg = getDataReg(MI);
+ }
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(dataReg);
+ MI->getOperand(0).setReg(dataReg);
+}
+
+
+void
+AMDIL789IOExpansion::expandConstantLoad(MachineInstr *MI)
+{
+ if (!isHardwareInst(MI) || MI->memoperands_empty()) {
+ return expandGlobalLoad(MI);
+ }
+ uint32_t cID = getPointerID(MI);
+ if (cID < 2) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesConstant() && mMFI->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SHR_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(4));
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(cID);
+ break;
+ case 1:
+ case 2:
+ case 4:
+ emitVectorAddressCalc(MI, true, true);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+ .addReg(AMDIL::Rx1007)
+ .addImm(cID);
+ // These instructions go after the current MI.
+ emitDataLoadSelect(MI);
+ break;
+ case 8:
+ emitVectorAddressCalc(MI, false, true);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+ .addReg(AMDIL::Rx1007)
+ .addImm(cID);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::CMOVLOG_i64), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rxy1011)
+ .addReg(AMDIL::Rzw1011);
+ break;
+ }
+ expandPackedData(MI);
+ unsigned dataReg = expandExtendLoad(MI);
+ if (!dataReg) {
+ dataReg = getDataReg(MI);
+ }
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(dataReg);
+ MI->getOperand(0).setReg(dataReg);
+}
+
+void
+AMDIL789IOExpansion::expandConstantPoolLoad(MachineInstr *MI)
+{
+ if (!isStaticCPLoad(MI)) {
+ return expandConstantLoad(MI);
+ } else {
+ uint32_t idx = MI->getOperand(1).getIndex();
+ const MachineConstantPool *MCP = MI->getParent()->getParent()
+ ->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &consts
+ = MCP->getConstants();
+ const Constant *C = consts[idx].Val.ConstVal;
+ emitCPInst(MI, C, mKM, 0, isExtendLoad(MI));
+ }
+}
+
+void
+AMDIL789IOExpansion::expandPrivateStore(MachineInstr *MI)
+{
+ bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+ if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
+ return expandGlobalStore(MI);
+ }
+ if (!mMFI->usesScratch() && mMFI->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t xID = getPointerID(MI);
+ assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
+ if (!xID) {
+ xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ // This section generates the following pseudo-IL:
+ // ishr r1010.x___, r1010.xxxx, 4
+ // mov x1[r1010.x], r1011
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SHR_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(4));
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SCRATCHSTORE), AMDIL::Rx1010)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ break;
+ case 1:
+ emitVectorAddressCalc(MI, true, true);
+ // This section generates the following pseudo-IL:
+ // mov r1002, x1[r1007.x]
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
+ .addReg(AMDIL::Rx1007)
+ .addImm(xID);
+ emitComponentExtract(MI, AMDIL::R1002, AMDIL::Rx1002, true);
+ // This section generates the following pseudo-IL:
+ // iand r1003.x, r1010.x, 3
+ // iadd r1001, r1003.x, {0, -1, -2, -3}
+ // ieq r1001, r1001, 0
+ // ishr r1002, r1002.x, {0, 8, 16, 24}
+ // cmov_logical r1002, r1001, r1011.x, r1002
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1003)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1001)
+ .addReg(AMDIL::Rx1003)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1001)
+ .addReg(AMDIL::R1001)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::Rx1002)
+ .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1001)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::R1002);
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // This section generates the following pseudo-IL:
+ // iand r1002, r1002, 0xFF
+ // ishl r1002, r1002, {0, 8, 16, 24}
+ // ior r1002.xy, r1002.xy, r1002.zw
+ // ior r1011.x, r1002.x, r1002.y
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::Rxy1002)
+ .addReg(AMDIL::Rxy1002).addReg(AMDIL::Rzw1002);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Ry1002).addReg(AMDIL::Rx1002);
+ } else {
+ // This section generates the following pseudo-IL:
+ // mov r1001.xy, r1002.yw
+ // mov r1002.xy, r1002.xz
+ // ubit_insert r1002.xy, 8, 8, r1001.xy, r1002.xy
+ // ubit_insert r1011.x, 16, 16, r1002.y, r1002.x
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::Rxy1001)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1002)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::Rxy1002)
+ .addImm(mMFI->addi32Literal(8))
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::Rxy1001)
+ .addReg(AMDIL::Rxy1002);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::Ry1002)
+ .addReg(AMDIL::Rx1002);
+ }
+ emitVectorAddressCalc(MI, true, false);
+ emitVectorSwitchWrite(MI, true);
+ break;
+ case 2:
+ emitVectorAddressCalc(MI, true, true);
+ // This section generates the following pseudo-IL:
+ // mov r1002, x1[r1007.x]
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
+ .addReg(AMDIL::Rx1007)
+ .addImm(xID);
+ emitComponentExtract(MI, AMDIL::R1002, AMDIL::Rx1002, true);
+ // This section generates the following pseudo-IL:
+ // ishr r1003.x, r1010.x, 1
+ // iand r1003.x, r1003.x, 1
+ // ishr r1001.x, r1002.x, 16
+ // cmov_logical r1002.x, r1003.x, r1002.x, r1011.x
+ // cmov_logical r1001.x, r1003.x, r1011.x, r1001.x
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1003)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1003)
+ .addReg(AMDIL::Rx1003)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1001)
+ .addReg(AMDIL::Rx1002)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1002)
+ .addReg(AMDIL::Rx1003)
+ .addReg(AMDIL::Rx1002)
+ .addReg(AMDIL::Rx1011);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1001)
+ .addReg(AMDIL::Rx1003)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1001);
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // This section generates the following pseudo-IL:
+ // iand r1002.x, r1002.x, 0xFFFF
+ // iand r1001.x, r1001.x, 0xFFFF
+ // ishl r1001.x, r1002.x, 16
+ // ior r1011.x, r1002.x, r1001.x
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1002)
+ .addReg(AMDIL::Rx1002)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1001)
+ .addReg(AMDIL::Rx1001)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1001)
+ .addReg(AMDIL::Rx1001)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_OR_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1002).addReg(AMDIL::Rx1001);
+
+ } else {
+ // This section generates the following pseudo-IL:
+ // ubit_insert r1011.x, 16, 16, r1001.x, r1002.x
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::Rx1001)
+ .addReg(AMDIL::Rx1002);
+ }
+ emitVectorAddressCalc(MI, true, false);
+ emitVectorSwitchWrite(MI, true);
+ break;
+ case 4:
+ emitVectorAddressCalc(MI, true, false);
+ emitVectorSwitchWrite(MI, true);
+ break;
+ case 8:
+ emitVectorAddressCalc(MI, false, false);
+ emitVectorSwitchWrite(MI, false);
+ break;
+ };
+}
+void
+AMDIL789IOExpansion::expandStoreSetupCode(MachineInstr *MI)
+{
+ DebugLoc DL;
+ bool is64bit = is64bitLSOp(TM, MI);
+ uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+ uint32_t addInst = (is64bit) ? AMDIL::LADD_i64 : AMDIL::ADD_i32;
+ uint32_t moveInst = (is64bit) ? AMDIL::MOVE_i64 : AMDIL::MOVE_i32;
+ if (MI->getOperand(0).isUndef()) {
+ BuildMI(*mBB, MI, DL, mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0));
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
+ .addReg(MI->getOperand(0).getReg());
+ }
+ expandTruncData(MI);
+ if (MI->getOperand(2).isReg()) {
+ BuildMI(*mBB, MI, DL, mTII->get(addInst), addyReg)
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(2).getReg());
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(moveInst), addyReg)
+ .addReg(MI->getOperand(1).getReg());
+ }
+ expandAddressCalc(MI);
+ expandPackedData(MI);
+}
+
+
+void
+AMDIL789IOExpansion::expandPackedData(MachineInstr *MI)
+{
+ if (!isPackedData(MI)) {
+ return;
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ // If we have packed data, then the shift size is no longer
+ // the same as the load size and we need to adjust accordingly
+ switch(getPackedID(MI)) {
+ default:
+ break;
+ case PACK_V2I8: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rxy1011)
+
+ .addImm(mMFI->addi64Literal(0xFFULL | (0xFFULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rxy1011).addImm(mMFI->addi64Literal(8ULL << 32));
+ // TODO: HILO_BITOR can be removed and replaced with OR.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011).addReg(AMDIL::Ry1011);
+
+ }
+ break;
+ case PACK_V4I8: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
+ // TODO: HILO_BITOR can be removed and replaced with OR.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rxy1011).addReg(AMDIL::Rzw1011);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011).addReg(AMDIL::Ry1011);
+
+ }
+ break;
+ case PACK_V2I16: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rxy1011)
+
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rxy1011)
+
+ .addImm(mMFI->addi64Literal(16ULL << 32));
+ // TODO: HILO_BITOR can be removed and replaced with OR.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011).addReg(AMDIL::Ry1011);
+
+ }
+ break;
+ case PACK_V4I16: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(16ULL << 32));
+ // TODO: HILO_BITOR can be removed and replaced with OR.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v4i16), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rxy1011).addReg(AMDIL::Rzw1011);
+
+ }
+ break;
+ case UNPACK_V2I8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::Ry1011)
+ .addReg(AMDIL::Rx1011)
+ .addImm(mMFI->addi32Literal(8));
+ break;
+ case UNPACK_V4I8: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_v4i8), AMDIL::R1011)
+ .addReg(AMDIL::Rx1011)
+ .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
+ }
+ break;
+ case UNPACK_V2I16: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::Ry1011)
+ .addReg(AMDIL::Rx1011)
+ .addImm(mMFI->addi32Literal(16));
+ }
+ break;
+ case UNPACK_V4I16: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_v2i32), AMDIL::Rxy1012)
+ .addReg(AMDIL::Rxy1011)
+
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::Rxy1011).addReg(AMDIL::Rxy1012);
+ }
+ break;
+ };
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,218 @@
+//===-- AMDIL7XXAsmPrinter.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL7XXAsmPrinter.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+// TODO: Add support for verbose.
+AMDIL7XXAsmPrinter::AMDIL7XXAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
+ : AMDILAsmPrinter(ASM_PRINTER_ARGUMENTS)
+{
+}
+
+AMDIL7XXAsmPrinter::~AMDIL7XXAsmPrinter()
+{
+}
+///
+/// @param name
+/// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+/// and returns that name if both of the tokens are present.
+///
+static
+std::string Strip(const std::string &name)
+{
+ size_t start = name.find("__OpenCL_");
+ size_t end = name.find("_kernel");
+ if (start == std::string::npos
+ || end == std::string::npos
+ || (start == end)) {
+ return name;
+ } else {
+ return name.substr(9, name.length()-16);
+ }
+}
+void
+AMDIL7XXAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+ OSTREAM_TYPE &O)
+{
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ const char *name = "unknown";
+ llvm::StringRef nameRef;
+ if (MI->getOperand(0).isGlobal()) {
+ nameRef = MI->getOperand(0).getGlobal()->getName();
+ name = nameRef.data();
+ if (curTarget->device()->usesHardware(
+ AMDILDeviceInfo::DoubleOps)
+ && !::strncmp(name, "__sqrt_f64", 10) ) {
+ name = "__sqrt_f64_7xx";
+ }
+ }
+ emitMCallInst(MI, O, name);
+}
+
+bool
+AMDIL7XXAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+ this->MF = &lMF;
+ mMeta->setMF(&lMF);
+ mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+ mAMI = &(lMF.getMMI().getObjFileInfo<AMDILModuleInfo>());
+
+ SetupMachineFunction(lMF);
+ std::string kernelName = MF->getFunction()->getName();
+ mName = Strip(kernelName);
+
+ mKernelName = kernelName;
+ EmitFunctionHeader();
+ EmitFunctionBody();
+ return false;
+}
+
+void
+AMDIL7XXAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+ std::string FunStr;
+ raw_string_ostream OFunStr(FunStr);
+ formatted_raw_ostream O(OFunStr);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ if (mDebugMode) {
+ O << ";" ;
+ II->print(O);
+ }
+ if (isMacroFunc(II)) {
+ emitMacroFunc(II, O);
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+ return;
+ }
+ if (isMacroCall(II)) {
+ unsigned reg = 0;
+ unsigned newDst = 0;
+ OpSwizzle opSwiz, oldSwiz;
+ const char *name = mTM->getInstrInfo()->getName(II->getOpcode()) + 5;
+ int macronum = amd::MacroDBFindMacro(name);
+ O << "\t;"<< name<<"\n";
+ O << "\tmcall("<<macronum<<") ";
+ reg = II->getOperand(0).getReg();
+ newDst = AMDIL::R1000;
+ oldSwiz.u8all = opSwiz.u8all =
+ II->getOperand(0).getTargetFlags();
+ if (isXComponentReg(reg)) {
+ newDst = AMDIL::Rx1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isYComponentReg(reg)) {
+ newDst = AMDIL::Ry1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isZComponentReg(reg)) {
+ newDst = AMDIL::Rz1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isWComponentReg(reg)) {
+ newDst = AMDIL::Rw1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isXYComponentReg(reg)) {
+ newDst = AMDIL::Rxy1000;
+ opSwiz.bits.swizzle = AMDIL_DST_XY__;
+ } else if (isZWComponentReg(reg)) {
+ newDst = AMDIL::Rzw1000;
+ opSwiz.bits.swizzle = AMDIL_DST_XY__;
+ } else {
+ opSwiz.bits.swizzle = AMDIL_DST_DFLT;
+ }
+ for (unsigned x = 0, y = II->getNumOperands(); x < y; ++x) {
+ if (!x) {
+ O << "(";
+ O << getRegisterName(newDst);
+ O << getDstSwizzle(opSwiz.bits.swizzle);
+ } else {
+ printOperand(II, x
+ , O
+ );
+ }
+ if (!x) {
+ O << "), (";
+ } else if (x != y - 1) {
+ O << ", ";
+ } else {
+ O << ")\n";
+ }
+ }
+ O << "\tmov " << getRegisterName(reg) << getDstSwizzle(oldSwiz.bits.swizzle)
+ << ", " << getRegisterName(newDst);
+ if (isXComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_X000);
+ } else if (isYComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_0X00);
+ } else if (isZComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_00X0);
+ } else if (isWComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_000X);
+ } else if (isXYComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_XY00);
+ } else if (isZWComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_00XY);
+ } else {
+ O << getSrcSwizzle(AMDIL_SRC_DFLT);
+ }
+ O << "\n";
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ mMacroIDs.insert(macronum);
+ } else {
+ mMFI->addCalledIntr(macronum);
+ }
+ } else {
+
+
+ // Print the assembly for the instruction.
+ // We want to make sure that we do HW constants
+ // before we do arena segment
+ if (mMeta->useCompilerWrite(II)) {
+ // TODO: This is a hack to get around some
+ // conformance failures.
+ O << "\tif_logicalz cb0[0].x\n";
+ O << "\tuav_raw_store_id("
+ << curTarget->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+ << ") ";
+ O << "mem0.x___, cb0[3].x, r0.0\n";
+ O << "\tendif\n";
+ mMFI->addMetadata(";memory:compilerwrite");
+ } else {
+ printInstruction(II, O);
+ }
+ }
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,62 @@
+//===-- AMDIL7XXAsmPrinter.h ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Asm Printer class for 7XX generation of cards. This class handles all of
+// the items that are unique to these devices that must be handles by the
+// AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_7XX_ASM_PRINTER_H_
+#define _AMDIL_7XX_ASM_PRINTER_H_
+#include "AMDILAsmPrinter.h"
+
+namespace llvm
+{
+class LLVM_LIBRARY_VISIBILITY AMDIL7XXAsmPrinter : public AMDILAsmPrinter
+{
+public:
+ //
+ // Constructor for the AMDIL 7XX specific AsmPrinter class.
+ // Interface is defined by LLVM proper and should reference
+ // there for more information.
+ //
+ AMDIL7XXAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS);
+
+ //
+ // Destructor for the 7XX Asm Printer class that deletes
+ // all of the allocated memory
+ //
+ virtual ~AMDIL7XXAsmPrinter();
+
+
+ void
+ EmitInstruction(const MachineInstr *MI);
+
+ //
+ // @param F MachineFunction to print the assembly for
+ // @brief parse the specified machine function and print
+ // out the assembly for all the instructions in the function
+ //
+ bool
+ runOnMachineFunction(MachineFunction &F);
+
+protected:
+ //
+ // @param MI Machine instruction to emit the macro code for
+ //
+ // Emits a fully functional macro function that uses the argument
+ // registers as the macro arguments.
+ //
+ virtual void
+ emitMacroFunc(const MachineInstr *MI, OSTREAM_TYPE &O);
+
+}; // AMDIL7XXAsmPrinter
+} // end of llvm namespace
+#endif // AMDIL_7XX_ASM_PRINTER_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,164 @@
+//===-- AMDIL7XXDevice.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL7XXDevice.h"
+#include "AMDIL7XXAsmPrinter.h"
+#include "AMDILDevice.h"
+#include "AMDILIOExpansion.h"
+#include "AMDILPointerManager.h"
+
+using namespace llvm;
+
+AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
+{
+ setCaps();
+ std::string name = mSTM->getDeviceName();
+ if (name == "rv710") {
+ mDeviceFlag = OCL_DEVICE_RV710;
+ } else if (name == "rv730") {
+ mDeviceFlag = OCL_DEVICE_RV730;
+ } else {
+ mDeviceFlag = OCL_DEVICE_RV770;
+ }
+}
+
+AMDIL7XXDevice::~AMDIL7XXDevice()
+{
+}
+
+void AMDIL7XXDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL7XXDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_700;
+ }
+ return 0;
+}
+
+size_t AMDIL7XXDevice::getWavefrontSize() const
+{
+ return AMDILDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDIL7XXDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
+{
+ switch (DeviceID) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case GLOBAL_ID:
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ case ARENA_UAV_ID:
+ break;
+ case LDS_ID:
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ }
+ break;
+ case SCRATCH_ID:
+ if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ }
+ break;
+ case GDS_ID:
+ assert(0 && "GDS UAV ID is not supported on this chip");
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ }
+ break;
+ };
+
+ return 0;
+}
+
+uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
+{
+ return 1;
+}
+
+FunctionPass*
+AMDIL7XXDevice::getIOExpansion(
+ TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+ return new AMDIL7XXIOExpansion(TM, OptLevel);
+}
+
+AsmPrinter*
+AMDIL7XXDevice::getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const
+{
+ return new AMDIL7XXAsmPrinter(ASM_PRINTER_ARGUMENTS);
+}
+
+FunctionPass*
+AMDIL7XXDevice::getPointerManager(
+ TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+ if (OptLevel == CodeGenOpt::None) {
+ // If we have no optimizations, we need to run the
+ // more advanced tracking pass as it is possible to
+ // loose information through the stack. The EGPM
+ // pass tracks this, but the standard pass does not.
+ return new AMDILEGPointerManager(TM, OptLevel);
+ } else {
+ return new AMDILPointerManager(TM, OptLevel);
+ }
+}
+
+AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
+{
+ setCaps();
+}
+
+AMDIL770Device::~AMDIL770Device()
+{
+}
+
+void AMDIL770Device::setCaps()
+{
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ }
+ mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+ mHWBits.reset(AMDILDeviceInfo::LongOps);
+ mSWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL770Device::getWavefrontSize() const
+{
+ return AMDILDevice::WavefrontSize;
+}
+
+AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
+{
+}
+
+AMDIL710Device::~AMDIL710Device()
+{
+}
+
+size_t AMDIL710Device::getWavefrontSize() const
+{
+ return AMDILDevice::QuarterWavefrontSize;
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,77 @@
+//===-- AMDIL7XXDevice.h --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL7XXDEVICEIMPL_H_
+#define _AMDIL7XXDEVICEIMPL_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+namespace llvm
+{
+class AMDILSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
+// devices are derived from this class. The AMDIL7XX device will only
+// support the minimal features that are required to be considered OpenCL 1.0
+// compliant and nothing more.
+class AMDIL7XXDevice : public AMDILDevice
+{
+public:
+ AMDIL7XXDevice(AMDILSubtarget *ST);
+ virtual ~AMDIL7XXDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
+ virtual uint32_t getMaxNumUAVs() const;
+ FunctionPass*
+ getIOExpansion(TargetMachine&, CodeGenOpt::Level) const;
+ AsmPrinter*
+ getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const;
+ FunctionPass*
+ getPointerManager(TargetMachine&, CodeGenOpt::Level) const;
+
+protected:
+ virtual void setCaps();
+}; // AMDIL7XXDevice
+
+// The AMDIL770Device class represents the RV770 chip and it's
+// derivative cards. The difference between this device and the base
+// class is this device device adds support for double precision
+// and has a larger wavefront size.
+class AMDIL770Device : public AMDIL7XXDevice
+{
+public:
+ AMDIL770Device(AMDILSubtarget *ST);
+ virtual ~AMDIL770Device();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDIL770Device
+
+// The AMDIL710Device class derives from the 7XX base class, but this
+// class is a smaller derivative, so we need to overload some of the
+// functions in order to correctly specify this information.
+class AMDIL710Device : public AMDIL7XXDevice
+{
+public:
+ AMDIL710Device(AMDILSubtarget *ST);
+ virtual ~AMDIL710Device();
+ virtual size_t getWavefrontSize() const;
+}; // AMDIL710Device
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,520 @@
+//===-- AMDIL7XXIOExpansion.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the IO Printing class for 7XX devices.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILIOExpansion.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cstdio>
+
+using namespace llvm;
+AMDIL7XXIOExpansion::AMDIL7XXIOExpansion(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel) : AMDIL789IOExpansion(tm, OptLevel)
+{
+}
+
+AMDIL7XXIOExpansion::~AMDIL7XXIOExpansion()
+{
+}
+const char *AMDIL7XXIOExpansion::getPassName() const
+{
+ return "AMDIL 7XX IO Expansion Pass";
+}
+
+void
+AMDIL7XXIOExpansion::expandGlobalLoad(MachineInstr *MI)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ switch(getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(ID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(ID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(ID);
+ break;
+ case 1:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1012)
+ .addImm(mMFI->addi32Literal(0))
+ .addImm(mMFI->addi32Literal(24));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Ry1012)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::Rx1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rz1012)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::Rx1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i8), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ break;
+ case 2:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i16), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ break;
+ }
+ expandPackedData(MI);
+ unsigned dataReg = expandExtendLoad(MI);
+ if (!dataReg) {
+ dataReg = getDataReg(MI);
+ }
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(dataReg);
+ MI->getOperand(0).setReg(dataReg);
+}
+
+void
+AMDIL7XXIOExpansion::expandRegionLoad(MachineInstr *MI)
+{
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+ return;
+ }
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesGDS() && mMFI->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS load that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+ .addReg(AMDIL::Ry1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rz1011)
+ .addReg(AMDIL::Rz1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rw1011)
+ .addReg(AMDIL::Rw1010)
+ .addImm(gID);
+ break;
+ case 1:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(gID);
+ // The instruction would normally fit in right here so everything created
+ // after this point needs to go into the afterInst vector.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ break;
+ case 2:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(gID);
+ // The instruction would normally fit in right here so everything created
+ // after this point needs to go into the afterInst vector.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+ .addReg(AMDIL::Ry1010)
+ .addImm(gID);
+ break;
+ }
+ expandPackedData(MI);
+ unsigned dataReg = expandExtendLoad(MI);
+ if (!dataReg) {
+ dataReg = getDataReg(MI);
+ }
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(dataReg);
+ MI->getOperand(0).setReg(dataReg);
+}
+void
+AMDIL7XXIOExpansion::expandLocalLoad(MachineInstr *MI)
+{
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesLDS() && mMFI->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS load that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rxy1010)
+ .addImm(lID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(lID);
+ break;
+ case 1:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ break;
+ case 2:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ break;
+ }
+ expandPackedData(MI);
+ unsigned dataReg = expandExtendLoad(MI);
+ if (!dataReg) {
+ dataReg = getDataReg(MI);
+ }
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(dataReg);
+ MI->getOperand(0).setReg(dataReg);
+}
+
+void
+AMDIL7XXIOExpansion::expandGlobalStore(MachineInstr *MI)
+{
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ case 1:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+ .addReg(AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+ .addImm(ID);
+ break;
+ case 2:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+ .addReg(AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+ .addImm(ID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+ .addReg(AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+ .addImm(ID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEMxy)
+ .addReg(AMDIL::Rx1010)
+ .addReg(AMDIL::Rxy1011)
+ .addImm(ID);
+ break;
+ };
+}
+
+void
+AMDIL7XXIOExpansion::expandRegionStore(MachineInstr *MI)
+{
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+ return;
+ }
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalStore(MI);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ mKM->setOutputInst();
+ if (!mMFI->usesGDS() && mMFI->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS store that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
+ .addReg(AMDIL::Ry1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rz1010)
+ .addReg(AMDIL::Rz1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rw1010)
+ .addReg(AMDIL::Rw1011)
+ .addImm(gID);
+ break;
+ case 1:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1012)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1006)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(0xFFFFFF00))
+ .addImm(mMFI->addi32Literal(0x00FFFFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Ry1008)
+ .addReg(AMDIL::Rx1007)
+ .addImm(mMFI->addi32Literal(0xFF00FFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+ .addReg(AMDIL::Rz1008)
+ .addReg(AMDIL::Rx1007)
+ .addImm(mMFI->addi32Literal(0xFFFF00FF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1007);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+ .addImm(gID);
+ break;
+ case 2:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addImm(mMFI->addi32Literal(0x0000FFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(0x0000FFFF))
+ .addImm(mMFI->addi32Literal(0xFFFF0000));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+ .addImm(gID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
+ .addReg(AMDIL::Ry1011)
+ .addImm(gID);
+ break;
+ };
+}
+
+void
+AMDIL7XXIOExpansion::expandLocalStore(MachineInstr *MI)
+{
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalStore(MI);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS store that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAlgorithms.tpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAlgorithms.tpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAlgorithms.tpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAlgorithms.tpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,86 @@
+//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
+//
+// This file provides templates algorithms that extend the STL algorithms, but
+// are useful for the AMDIL backend
+//
+//===----------------------------------------------------------------------===//
+
+// A template function that loops through the iterators and passes the second
+// argument along with each iterator to the function. If the function returns
+// true, then the current iterator is invalidated and it moves back, before
+// moving forward to the next iterator, otherwise it moves forward without
+// issue. This is based on the for_each STL function, but allows a reference to
+// the second argument
+template<class InputIterator, class Function, typename Arg>
+Function binaryForEach(InputIterator First, InputIterator Last, Function F,
+ Arg &Second)
+{
+ for ( ; First!=Last; ++First ) {
+ F(*First, Second);
+ }
+ return F;
+}
+
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
+ Arg &Second)
+{
+ for ( ; First!=Last; ++First ) {
+ if (F(*First, Second)) {
+ --First;
+ }
+ }
+ return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with the passed in argument. See binaryForEach for further
+// explanation
+template<class InputIterator, class Function, typename Arg>
+Function binaryNestedForEach(InputIterator First, InputIterator Last,
+ Function F, Arg &Second)
+{
+ for ( ; First != Last; ++First) {
+ binaryForEach(First->begin(), First->end(), F, Second);
+ }
+ return F;
+}
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
+ Function F, Arg &Second)
+{
+ for ( ; First != Last; ++First) {
+ safeBinaryForEach(First->begin(), First->end(), F, Second);
+ }
+ return F;
+}
+
+// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
+// versions of these functions This allows the function to handle situations
+// such as invalidated iterators
+template<class InputIterator, class Function>
+Function safeForEach(InputIterator First, InputIterator Last, Function F)
+{
+ for ( ; First!=Last; ++First ) F(&First)
+ ; // Do nothing.
+ return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator. See binaryForEach for
+// further explanation
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F)
+{
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,98 @@
+//===-- AMDILAsmBackend.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILAsmBackend.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+namespace llvm
+{
+ASM_BACKEND_CLASS* createAMDILAsmBackend(const ASM_BACKEND_CLASS &T,
+ const std::string &TT)
+{
+ return new AMDILAsmBackend(T);
+}
+} // namespace llvm
+
+//===--------------------- Default AMDIL Asm Backend ---------------------===//
+AMDILAsmBackend::AMDILAsmBackend(const ASM_BACKEND_CLASS &T)
+ : ASM_BACKEND_CLASS()
+{
+}
+
+MCObjectWriter *
+AMDILAsmBackend::createObjectWriter(raw_ostream &OS) const
+{
+ return 0;
+}
+
+bool
+AMDILAsmBackend::doesSectionRequireSymbols(const MCSection &Section) const
+{
+ return false;
+}
+
+bool
+AMDILAsmBackend::isSectionAtomizable(const MCSection &Section) const
+{
+ return true;
+}
+
+bool
+AMDILAsmBackend::isVirtualSection(const MCSection &Section) const
+{
+ return false;
+ //const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
+ //return SE.getType() == MCSectionELF::SHT_NOBITS;
+}
+void
+AMDILAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const
+{
+}
+
+bool
+AMDILAsmBackend::mayNeedRelaxation(const MCInst &Inst
+ ) const
+{
+ return false;
+}
+
+bool
+AMDILAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const
+{
+ // Relax if the value is too big for a (signed) i8.
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+
+
+void
+AMDILAsmBackend::relaxInstruction(const MCInst &Inst,
+ MCInst &Res) const
+{
+}
+
+bool
+AMDILAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const
+{
+ return false;
+}
+
+unsigned
+AMDILAsmBackend::getNumFixupKinds() const
+{
+ return 0;
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,47 @@
+//===-- AMDILAsmBackend.h -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_ASM_BACKEND_H_
+#define _AMDIL_ASM_BACKEND_H_
+#include "AMDIL.h"
+#include "llvm/MC/MCAsmBackend.h"
+#define ASM_BACKEND_CLASS MCAsmBackend
+
+using namespace llvm;
+namespace llvm
+{
+class AMDILAsmBackend : public ASM_BACKEND_CLASS
+{
+public:
+ AMDILAsmBackend(const ASM_BACKEND_CLASS &T);
+ virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const;
+ virtual bool isSectionAtomizable(const MCSection &Section) const;
+ virtual bool isVirtualSection(const MCSection &Section) const;
+ virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+ virtual bool
+ mayNeedRelaxation(const MCInst &Inst
+ ) const;
+ virtual bool
+ fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
+ virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
+ virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+ unsigned getNumFixupKinds() const;
+}; // class AMDILAsmBackend;
+} // llvm namespace
+
+#endif // _AMDIL_ASM_BACKEND_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1028 @@
+//===-- AMDILAsmPrinter.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#if !defined(NDEBUG) && !defined(USE_APPLE)
+# define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+# define DEBUGME (false)
+#endif
+#include "AMDILAsmPrinter.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILCompilerErrors.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <sstream>
+using namespace llvm;
+/// createAMDILCodePrinterPass - Returns a pass that prints the AMDIL
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+
+ASMPRINTER_RETURN_TYPE
+createAMDILCodePrinterPass(AMDIL_ASM_PRINTER_ARGUMENTS)
+{
+ const AMDILSubtarget *stm = &TM.getSubtarget<AMDILSubtarget>();
+ return stm->device()->getAsmPrinter(ASM_PRINTER_ARGUMENTS);
+}
+
+#include "AMDILGenAsmWriter.inc"
+// Force static initialization
+extern "C" void LLVMInitializeAMDILAsmPrinter()
+{
+ llvm::TargetRegistry::RegisterAsmPrinter(TheAMDILTarget,
+ createAMDILCodePrinterPass);
+}
+
+AMDILInstPrinter *llvm::createAMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+{
+ return new AMDILInstPrinter(MAI, MII, MRI);
+}
+
+//
+// @param name
+// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+// and returns that name if both of the tokens are present.
+//
+static
+std::string Strip(const std::string &name)
+{
+ size_t start = name.find("__OpenCL_");
+ size_t end = name.find("_kernel");
+ if (start == std::string::npos
+ || end == std::string::npos
+ || (start == end)) {
+ return name;
+ } else {
+ return name.substr(9, name.length()-16);
+ }
+}
+// TODO: Add support for verbose.
+AMDILAsmPrinter::AMDILAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
+ : AsmPrinter(ASM_PRINTER_ARGUMENTS)
+{
+ mDebugMode = DEBUGME;
+ mTM = reinterpret_cast<AMDILTargetMachine*>(&TM);
+ mTM->setDebug(mDebugMode);
+ mMeta = new AMDILKernelManager(mTM);
+ mBuffer = 0;
+ mNeedVersion = false;
+ mMFI = NULL;
+ mAMI = NULL;
+}
+
+AMDILAsmPrinter::~AMDILAsmPrinter()
+{
+ delete mMeta;
+}
+const char*
+AMDILAsmPrinter::getPassName() const
+{
+ return "AMDIL Assembly Printer";
+}
+
+void
+AMDILAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+ std::string FunStr;
+ raw_string_ostream OFunStr(FunStr);
+ formatted_raw_ostream O(OFunStr);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ if (mDebugMode) {
+ O << ";" ;
+ II->print(O);
+ }
+ if (isMacroFunc(II)) {
+ emitMacroFunc(II, O);
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+ return;
+ }
+ if (isMacroCall(II)) {
+ unsigned reg = 0;
+ unsigned newDst = 0;
+ OpSwizzle opSwiz, oldSwiz;
+ const char *name = mTM->getInstrInfo()->getName(II->getOpcode()) + 5;
+ int macronum = amd::MacroDBFindMacro(name);
+ O << "\t;"<< name<<"\n";
+ O << "\tmcall("<<macronum<<")";
+ reg = II->getOperand(0).getReg();
+ newDst = AMDIL::R1000;
+ oldSwiz.u8all = opSwiz.u8all =
+ II->getOperand(0).getTargetFlags();
+ if (isXComponentReg(reg)) {
+ newDst = AMDIL::Rx1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isYComponentReg(reg)) {
+ newDst = AMDIL::Ry1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isZComponentReg(reg)) {
+ newDst = AMDIL::Rz1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isWComponentReg(reg)) {
+ newDst = AMDIL::Rw1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isXYComponentReg(reg)) {
+ newDst = AMDIL::Rxy1000;
+ opSwiz.bits.swizzle = AMDIL_DST_XY__;
+ } else if (isZWComponentReg(reg)) {
+ newDst = AMDIL::Rzw1000;
+ opSwiz.bits.swizzle = AMDIL_DST_XY__;
+ } else {
+ opSwiz.bits.swizzle = AMDIL_DST_DFLT;
+ }
+ for (unsigned x = 0, y = II->getNumOperands(); x < y; ++x) {
+ if (!x) {
+ O << "(";
+ O << getRegisterName(newDst);
+ O << getDstSwizzle(opSwiz.bits.swizzle);
+ } else {
+ printOperand(II, x
+ , O
+ );
+ }
+ if (!x) {
+ O << "), (";
+ } else if (x != y - 1) {
+ O << ", ";
+ } else {
+ O << ")\n";
+ }
+ }
+ O << "\tmov " << getRegisterName(reg) << getDstSwizzle(oldSwiz.bits.swizzle)
+ << ", " << getRegisterName(newDst);
+ if (isXComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_X000);
+ } else if (isYComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_0X00);
+ } else if (isZComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_00X0);
+ } else if (isWComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_000X);
+ } else if (isXYComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_XY00);
+ } else if (isZWComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_00XY);
+ } else {
+ O << getSrcSwizzle(AMDIL_SRC_DFLT);
+ }
+ O << "\n";
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ mMacroIDs.insert(macronum);
+ } else {
+ mMFI->addCalledIntr(macronum);
+ }
+ } else {
+
+ printInstruction(II, O);
+ }
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+}
+void
+AMDILAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+ OSTREAM_TYPE &O)
+{
+ const char *name = "unknown";
+ llvm::StringRef nameRef;
+ if (MI->getOperand(0).isGlobal()) {
+ nameRef = MI->getOperand(0).getGlobal()->getName();
+ name = nameRef.data();
+ }
+ emitMCallInst(MI, O, name);
+}
+
+bool
+AMDILAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+ this->MF = &lMF;
+ mMeta->setMF(&lMF);
+ mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+ mAMI = &(lMF.getMMI().getObjFileInfo<AMDILModuleInfo>());
+
+ SetupMachineFunction(lMF);
+ std::string kernelName = MF->getFunction()->getName();
+ mName = Strip(kernelName);
+
+ mKernelName = kernelName;
+ EmitFunctionHeader();
+ EmitFunctionBody();
+ return false;
+}
+
+void
+AMDILAsmPrinter::addCPoolLiteral(const Constant *C)
+{
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isFloatTy()) {
+ mMFI->addf32Literal(CFP);
+ } else {
+ mMFI->addf64Literal(CFP);
+ }
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ int64_t val = 0;
+ if (CI) {
+ val = CI->getSExtValue();
+ }
+ if (CI->getBitWidth() == (int64_t)64) {
+ mMFI->addi64Literal(val);
+ } else if (CI->getBitWidth() == (int64_t)8) {
+ mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i8);
+ } else if (CI->getBitWidth() == (int64_t)16) {
+ mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i16);
+ } else {
+ mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i32);
+ }
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+ uint32_t size = CA->getNumOperands();
+ for (uint32_t x = 0; x < size; ++x) {
+ addCPoolLiteral(CA->getOperand(x));
+ }
+ } else if (const ConstantAggregateZero *CAZ
+ = dyn_cast<ConstantAggregateZero>(C)) {
+ if (CAZ->isNullValue()) {
+ mMFI->addi32Literal(0, AMDIL::LOADCONST_i32);
+ mMFI->addi64Literal(0);
+ mMFI->addf64Literal((uint64_t)0);
+ mMFI->addf32Literal((uint32_t)0);
+ }
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+ uint32_t size = CS->getNumOperands();
+ for (uint32_t x = 0; x < size; ++x) {
+ addCPoolLiteral(CS->getOperand(x));
+ }
+ } else if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ // TODO: Make this handle vectors natively up to the correct
+ // size
+ uint32_t size = CV->getNumOperands();
+ for (uint32_t x = 0; x < size; ++x) {
+ addCPoolLiteral(CV->getOperand(x));
+ }
+ } else {
+ // TODO: Do we really need to handle ConstantPointerNull?
+ // What about BlockAddress, ConstantExpr and Undef?
+ // How would these even be generated by a valid CL program?
+ assert(0 && "Found a constant type that I don't know how to handle");
+ }
+}
+
+void
+AMDILAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV)
+{
+ llvm::StringRef GVname = GV->getName();
+ SmallString<1024> Str;
+ raw_svector_ostream O(Str);
+ int32_t autoSize = mAMI->getArrayOffset(GVname);
+ int32_t constSize = mAMI->getConstOffset(GVname);
+ O << ".global@" << GVname;
+ if (autoSize != -1) {
+ O << ":" << autoSize << "\n";
+ } else if (constSize != -1) {
+ O << ":" << constSize << "\n";
+ }
+ O.flush();
+ OutStreamer.EmitRawText(O.str());
+}
+
+
+void
+AMDILAsmPrinter::printOperand(const MachineInstr *MI, int opNum
+ , OSTREAM_TYPE &O
+ )
+{
+ const MachineOperand &MO = MI->getOperand (opNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (MO.isReg()) {
+ unsigned opcode = MI->getOpcode();
+ if ((signed)MO.getReg() < 0) {
+ // FIXME: we need to remove all virtual register creation after register allocation.
+ // This is a work-around to make sure that the virtual register range does not
+ // clobber the physical register range.
+ O << "r" << ((MO.getReg() & 0x7FFFFFFF) + 2048) << getSwizzle(MI, opNum);
+ } else if (opNum == 0
+ && (opcode == AMDIL::SCRATCHSTORE
+ ||opcode == AMDIL::SCRATCHSTORE64)) {
+ O << getRegisterName(MO.getReg()) << ".x]";
+ // If we aren't the vector register, print the dst swizzle.
+ if (MI->getOperand(1).getReg() != AMDIL::R1011) {
+ O << getSwizzle(MI, opNum);
+ }
+ } else {
+ O << getRegisterName(MO.getReg()) << getSwizzle(MI, opNum);
+ }
+ } else {
+ assert(0 && "Invalid Register type");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ case MachineOperand::MO_FPImmediate: {
+ unsigned opcode = MI->getOpcode();
+ if ((opNum == (int)(MI->getNumOperands() - 1))
+ && ( (opcode >= AMDIL::ATOM_A_ADD
+ && opcode <= AMDIL::ATOM_R_XOR_NORET_B64)
+ || (opcode >= AMDIL::ATOM64_G_ADD
+ && opcode <= AMDIL::ATOM64_R_XOR_NORET_B64)
+ || opcode == AMDIL::SEMAPHORE_INIT
+ || (opcode >= AMDIL::SCRATCHLOAD
+ && opcode <= AMDIL::SCRATCHSTORE)
+ || (opcode >= AMDIL::LDSLOAD && opcode <= AMDIL::LDSSTORE_i8)
+ || (opcode >= AMDIL::GDSLOAD && opcode <= AMDIL::GDSSTORE)
+ || (opcode >= AMDIL::UAVARENALOAD_i16
+ && opcode <= AMDIL::UAVRAWSTORE_v4i32)
+ || opcode == AMDIL::CBLOAD
+ || opcode == AMDIL::CASE)
+ ) {
+ O << MO.getImm();
+ } else if (((opcode >= AMDIL::VEXTRACT_v2f32
+ && opcode <= AMDIL::VEXTRACT_v4i8)
+ && (opNum == 2))) {
+ // The swizzle is encoded in the operand so the
+ // literal that represents the swizzle out of ISel
+ // can be ignored.
+ } else if ((opcode >= AMDIL::VINSERT_v2f32)
+ && (opcode <= AMDIL::VINSERT_v4i8)
+ && ((opNum == 3) || (opNum == 4))) {
+ // The swizzle is encoded in the operand so the
+ // literal that represents the swizzle out of ISel
+ // can be ignored.
+ // The swizzle is encoded in the operand so the
+ // literal that represents the swizzle out of ISel
+ // can be ignored.
+ } else if (opNum == 1 &&
+ (isAppendInst(TM, MI)
+ || isReadImageInst(TM, MI)
+ || isImageTXLDInst(TM, MI)
+ || opcode == AMDIL::CBLOAD)) {
+ // We don't need to emit the 'l' so we just emit
+ // the immediate as it stores the resource ID and
+ // is not a true literal.
+ O << MO.getImm();
+ } else if (opNum == 0 &&
+ (opcode == AMDIL::SEMAPHORE_INIT
+ || opcode == AMDIL::SEMAPHORE_WAIT
+ || opcode == AMDIL::SEMAPHORE_SIGNAL
+ || isReadImageInst(TM, MI)
+ || isWriteImageInst(TM, MI))) {
+ O << MO.getImm();
+ } else if (opNum == 3 && isReadImageInst(TM, MI)) {
+ O << MO.getImm();
+ } else if (MO.isImm() || MO.isFPImm()) {
+ O << "l" << MO.getImm() << getSwizzle(MI, opNum);
+ } else {
+ assert(0 && "Invalid literal/constant type");
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ }
+ }
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ EmitBasicBlockStart(MO.getMBB());
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ int offset = 0;
+ const GlobalValue *gv = MO.getGlobal();
+ // Here we look up by the name for the corresponding number
+ // and we print that out instead of the name or the address
+ if (MI->getOpcode() == AMDIL::CALL) {
+ uint32_t funcNum;
+ llvm::StringRef name = gv->getName();
+ funcNum = name.empty()
+ ? mAMI->getOrCreateFunctionID(gv)
+ : mAMI->getOrCreateFunctionID(name);
+ mMFI->addCalledFunc(funcNum);
+ O << funcNum <<" ; "<< name;
+ } else if((offset = mAMI->getArrayOffset(gv->getName()))
+ != -1) {
+ mMFI->setUsesLDS();
+ O << "l" << mMFI->getIntLits(offset) << ".x";
+ } else if((offset = mAMI->getConstOffset(gv->getName()))
+ != -1) {
+ mMFI->addMetadata(";memory:datareqd");
+ O << "l" << mMFI->getIntLits(offset) << ".x";
+ mMFI->setUsesConstant();
+ } else {
+ assert(0 && "GlobalAddress without a function call!");
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[MISSING_FUNCTION_CALL]);
+ }
+ }
+ break;
+ case MachineOperand::MO_ExternalSymbol: {
+ if (MI->getOpcode() == AMDIL::CALL) {
+ uint32_t funcNum = mAMI->getOrCreateFunctionID(
+ std::string(MO.getSymbolName()));
+ mMFI->addCalledFunc(funcNum);
+ O << funcNum << " ; "<< MO.getSymbolName();
+ // This is where pointers should get resolved
+ } else {
+ assert(0 && "ExternalSymbol without a function call!");
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[MISSING_FUNCTION_CALL]);
+ }
+ }
+ break;
+ case MachineOperand::MO_ConstantPoolIndex: {
+ // Copies of constant buffers need to be done here
+ const AMDILKernel *tmp = mAMI->getKernel(mKernelName);
+ O << "l" << mMFI->getIntLits(
+ tmp->CPOffsets[MO.getIndex()].first);
+ }
+ break;
+ default:
+ O << "<unknown operand type>";
+ break;
+ }
+}
+
+void
+AMDILAsmPrinter::printMemOperand(
+ const MachineInstr *MI,
+ int opNum,
+ OSTREAM_TYPE &O,
+ const char *Modifier
+)
+{
+ const MachineOperand &MO = MI->getOperand (opNum);
+ if (opNum != 1) {
+ printOperand(MI, opNum
+ , O
+ );
+ } else {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (MO.isReg()) {
+ unsigned opcode = MI->getOpcode();
+ if ((signed)MO.getReg() < 0) {
+ // FIXME: we need to remove all virtual register creation after register allocation.
+ // This is a work-around to make sure that the virtual register range does not
+ // clobber the physical register range.
+ O << "r" << ((MO.getReg() & 0x7FFFFFFF) + 2048) << getSwizzle(MI, opNum);
+ } else if (opNum == 0
+ && (opcode == AMDIL::SCRATCHSTORE
+ ||opcode == AMDIL::SCRATCHSTORE64)) {
+ O << getRegisterName(MO.getReg()) << ".x]" << getSwizzle(MI, opNum);
+ } else {
+ O << getRegisterName(MO.getReg()) << getSwizzle(MI, opNum);
+ }
+ } else {
+ assert(0 && "Invalid Register type");
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ case MachineOperand::MO_FPImmediate: {
+ unsigned opcode = MI->getOpcode();
+ if ((opNum == (int)(MI->getNumOperands() - 1))
+ && ((opcode >= AMDIL::ATOM_A_ADD
+ && opcode <= AMDIL::ATOM_R_XOR_B64)
+ || opcode == AMDIL::SEMAPHORE_INIT
+ || (opcode >= AMDIL::SCRATCHLOAD
+ && opcode <= AMDIL::SCRATCHSTORE)
+ || (opcode >= AMDIL::LDSLOAD && opcode <= AMDIL::LDSSTORE_i8)
+ || (opcode >= AMDIL::GDSLOAD && opcode <= AMDIL::GDSSTORE)
+ || (opcode >= AMDIL::UAVARENALOAD_i32
+ && opcode <= AMDIL::UAVRAWSTORE_v4i32)
+ || opcode == AMDIL::CBLOAD
+ || opcode == AMDIL::CASE)
+ ) {
+ O << MO.getImm();
+ } else if (opNum == 1 &&
+ (isAppendInst(TM, MI)
+ || isReadImageInst(TM, MI)
+ || isImageTXLDInst(TM, MI)
+ || opcode == AMDIL::CBLOAD)) {
+ // We don't need to emit the 'l' so we just emit
+ // the immediate as it stores the resource ID and
+ // is not a true literal.
+ O << MO.getImm();
+ } else if (opNum == 0 &&
+ (opcode == AMDIL::SEMAPHORE_INIT
+ || opcode == AMDIL::SEMAPHORE_WAIT
+ || opcode == AMDIL::SEMAPHORE_SIGNAL
+ || isReadImageInst(TM, MI)
+ || isWriteImageInst(TM, MI))) {
+ O << MO.getImm();
+ } else if (opNum == 3 && isReadImageInst(TM, MI)) {
+ O << MO.getImm();
+ } else if (MO.isImm() || MO.isFPImm()) {
+ O << "l" << MO.getImm();
+ } else {
+ assert(0 && "Invalid literal/constant type");
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ }
+ }
+ break;
+ case MachineOperand::MO_ConstantPoolIndex: {
+ // Copies of constant buffers need to be done here
+ const AMDILKernel *tmp = mAMI->getKernel(mKernelName);
+ O << "l" << mMFI->getIntLits(
+ tmp->CPOffsets[MO.getIndex()].first);
+ }
+ break;
+ default:
+ O << "<unknown operand type>";
+ break;
+ };
+ }
+}
+
+
+const char*
+AMDILAsmPrinter::getSwizzle(const MachineInstr *MI, int opNum)
+{
+ const MachineOperand &MO = MI->getOperand(opNum);
+ OpSwizzle swiz;
+ swiz.u8all = MO.getTargetFlags();
+ if (!swiz.bits.dst) {
+ return getSrcSwizzle(swiz.bits.swizzle);
+ } else {
+ return getDstSwizzle(swiz.bits.swizzle);
+ }
+}
+
+void
+AMDILAsmPrinter::EmitStartOfAsmFile(Module &M)
+{
+ SmallString<1024> Str;
+ raw_svector_ostream O(Str);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ curTarget->setKernelManager(mMeta);
+
+
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ // Since we are using the macro db, the first token must be a macro.
+ // So we make up a macro that is never used.
+ // I originally picked -1, but the IL text translater treats them as
+ // unsigned integers.
+ O << "mdef(16383)_out(1)_in(2)\n";
+ O << "mov r0, in0\n";
+ O << "mov r1, in1\n";
+ O << "div_zeroop(infinity) r0.x___, r0.x, r1.x\n";
+ O << "mov out0, r0\n";
+ O << "mend\n";
+ }
+
+
+ // We need to increase the number of reserved literals for
+ // any literals we output manually instead of via the
+ // emitLiteral function. This function should never
+ // have any executable code in it. Only declarations
+ // and the main function patch symbol.
+ if (curTarget->device()->getGeneration() == AMDILDeviceInfo::HDTEST) {
+ O << "il_cs_3_0\n";
+ } else {
+ O << "il_cs_2_0\n";
+ }
+ O << "dcl_cb cb0[15] ; Constant buffer that holds ABI data\n";
+ O << "dcl_literal l0, 0x00000004, 0x00000001, 0x00000002, 0x00000003\n";
+ O << "dcl_literal l1, 0x00FFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFD\n";
+ O << "dcl_literal l2, 0x0000FFFF, 0xFFFFFFFE, 0x000000FF, 0xFFFFFFFC\n";
+ O << "dcl_literal l3, 0x00000018, 0x00000010, 0x00000008, 0xFFFFFFFF\n";
+ O << "dcl_literal l4, 0xFFFFFF00, 0xFFFF0000, 0xFF00FFFF, 0xFFFF00FF\n";
+ O << "dcl_literal l5, 0x00000000, 0x00000004, 0x00000008, 0x0000000C\n";
+ O << "dcl_literal l6, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n";
+ O << "dcl_literal l7, 0x00000018, 0x0000001F, 0x00000010, 0x0000001F\n";
+ O << "dcl_literal l8, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n";
+ O << ";$$$$$$$$$$\n";
+ O << "endmain\n";
+ O << ";DEBUGSTART\n";
+ OutStreamer.EmitRawText(O.str());
+}
+void
+AMDILAsmPrinter::EmitEndOfAsmFile(Module &M)
+{
+ SmallString<1024> Str;
+ raw_svector_ostream O(Str);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ O << ";DEBUGEND\n";
+ if (curTarget->device()->isSupported(AMDILDeviceInfo::MacroDB)) {
+ int lines;
+ for (llvm::DenseSet<uint32_t>::iterator msb = mMacroIDs.begin()
+ , mse = mMacroIDs.end(); msb != mse; ++msb) {
+ int idx = *msb;
+ const char* *macro = amd::MacroDBGetMacro(&lines, idx);
+ for (int k = 0; k < lines; ++k) {
+ O << macro[k];
+ }
+ }
+ }
+ if (mAMI) mAMI->dumpDataSection(O, mMFI);
+ O << "\nend\n";
+#ifdef _DEBUG
+ if (mDebugMode) {
+ mTM->dump(O);
+ }
+#endif
+ OutStreamer.EmitRawText(O.str());
+}
+void
+AMDILAsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const
+{
+ assert(0 && "When is this function hit!");
+}
+bool
+AMDILAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned int OpNo,
+ unsigned int AsmVariant, const char *ExtraCode)
+{
+ assert(0 && "When is this function hit!");
+ return false;
+}
+bool
+AMDILAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned int OpNo, unsigned int AsmVariant, const char *ExtraCode)
+{
+ assert(0 && "When is this function hit!");
+ return false;
+}
+void
+AMDILAsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV)
+{
+ assert(0 && "When is this function hit!");
+}
+void
+AMDILAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const
+{
+ assert(0 && "When is this function hit!");
+}
+void
+AMDILAsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const
+{
+ assert(0 && "When is this function hit!");
+}
+void
+AMDILAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const
+{
+ assert(0 && "When is this function hit!");
+}
+
+void
+AMDILAsmPrinter::EmitFunctionBodyStart()
+{
+ SmallString<1024> Str;
+ raw_svector_ostream O(Str);
+
+ bool isKernel = false;
+ O << "";
+ O << ";DEBUGEND\n";
+ ++mBuffer;
+ isKernel = mMFI->isKernel();
+ uint32_t id = mName.empty()
+ ? mAMI->getOrCreateFunctionID(MF->getFunction())
+ : mAMI->getOrCreateFunctionID(mName);
+ mMeta->setKernel(isKernel);
+ mMeta->setID(id);
+ if (isKernel) {
+ mMeta->printHeader(this, O, mKernelName);
+ mMeta->processArgMetadata(O, mBuffer, isKernel);
+ mMeta->printGroupSize(O);
+ mMeta->printDecls(this, O);
+ AMDILKernel &tmp = *(mMFI->getKernel());
+ // add the literals for the offsets and sizes of
+ // all kernel declared local arrays
+ if (tmp.lvgv) {
+ AMDILLocalArg *lptr = tmp.lvgv;
+ llvm::SmallVector<AMDILArrayMem*, DEFAULT_VEC_SLOTS>::iterator lmb, lme;
+ for (lmb = lptr->local.begin(), lme = lptr->local.end();
+ lmb != lme; ++lmb) {
+ mMFI->addi32Literal((*lmb)->offset);
+ mMFI->addi32Literal((*lmb)->vecSize);
+ mMFI->setUsesLDS();
+ }
+ }
+ // Add the literals for the offsets and sizes of
+ // all the globally scoped constant arrays
+ for (StringMap<AMDILConstPtr>::iterator cmb = mAMI->consts_begin(),
+ cme = mAMI->consts_end(); cmb != cme; ++cmb) {
+ mMFI->addi32Literal((cmb)->second.offset);
+ mMFI->addi32Literal((cmb)->second.size);
+ mMFI->addMetadata(";memory:datareqd");
+ mMFI->setUsesConstant();
+ }
+
+ // Add the literals for the offsets and sizes of
+ // all the kernel constant arrays
+ llvm::SmallVector<AMDILConstPtr, DEFAULT_VEC_SLOTS>::const_iterator cpb, cpe;
+ for (cpb = tmp.constPtr.begin(), cpe = tmp.constPtr.end();
+ cpb != cpe; ++cpb) {
+ mMFI->addi32Literal(cpb->size);
+ mMFI->addi32Literal(cpb->offset);
+ mMFI->setUsesConstant();
+ }
+ mMeta->emitLiterals(O);
+ // Add 1 to the size so that the next literal is the one we want
+ mMeta->printArgCopies(O, this);
+ O << "call " << id << " ; " << mName << "\n";
+ mMeta->printFooter(O);
+ mMeta->printMetaData(O, id, isKernel);
+ O << "func " << id << " ; " << mName << "\n";
+ } else {
+ if (mName.empty()) {
+ std::stringstream ss;
+ ss << "unknown_" << id;
+ mName = ss.str();
+ }
+ mMeta->setName(mName);
+ O << "func " << id << " ; " << mName << "\n";
+ mMeta->processArgMetadata(O, mBuffer, false);
+ }
+ O.flush();
+ OutStreamer.EmitRawText(O.str());
+}
+void
+AMDILAsmPrinter::EmitFunctionBodyEnd()
+{
+ SmallString<1024> Str;
+ raw_svector_ostream O(Str);
+ uint32_t id = mName.empty()
+ ? mAMI->getOrCreateFunctionID(MF->getFunction())
+ : mAMI->getOrCreateFunctionID(mName);
+ if (mName.empty()) {
+ std::stringstream ss;
+ ss << "unknown_" << id;
+ mName = ss.str();
+ }
+ if (mAMI->isKernel(mKernelName)) {
+ O << "ret\nendfunc ; " << mName << "\n";
+ mMeta->setName(mName);
+ mMeta->printMetaData(O, id, false);
+ } else {
+ O << "ret\nendfunc ; " << mName << "\n";
+ mMeta->printMetaData(O, id, false);
+ }
+ mMeta->clear();
+ O << ";DEBUGSTART\n";
+ O.flush();
+ OutStreamer.EmitRawText(O.str());
+}
+void
+AMDILAsmPrinter::EmitConstantPool()
+{
+ if (!mAMI->getKernel(mKernelName)) {
+ return;
+ }
+ AMDILKernel *tmp = mAMI->getKernel(mKernelName);
+ if (!tmp || !tmp->mKernel) {
+ return;
+ }
+ mAMI->calculateCPOffsets(MF, tmp);
+ // Add all the constant pool offsets to the literal table
+ for (uint32_t x = 0; x < tmp->CPOffsets.size(); ++x) {
+ mMFI->addMetadata(";memory:datareqd");
+ mMFI->addi32Literal(tmp->CPOffsets[x].first);
+ }
+
+ // Add all the constant pool constants to the literal tables
+ {
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &consts
+ = MCP->getConstants();
+ for (uint32_t x = 0, s = consts.size(); x < s; ++x) {
+ addCPoolLiteral(consts[x].Val.ConstVal);
+ }
+ }
+}
+void
+AMDILAsmPrinter::EmitFunctionEntryLabel()
+{
+ return;
+ assert(0 && "When is this function hit!");
+}
+
+/// getDebugResourceLocation - Get resource id information encoded in
+/// target flags.
+uint32_t AMDILAsmPrinter::getDebugResourceID(const MachineInstr *MI) const
+{
+ const llvm::MachineOperand& opr = MI->getOperand(MI->getNumOperands() - 1);
+ assert(opr.isMetadata());
+ const MDNode *Var = opr.getMetadata();
+ const Value * valOfVar = Var;
+ uint32_t resourceID = mMeta->getUAVID(valOfVar);
+ return resourceID;
+}
+
+bool
+AMDILAsmPrinter::isMacroCall(const MachineInstr *MI)
+{
+ return !strncmp(mTM->getInstrInfo()->getName(MI->getOpcode()), "MACRO", 5);
+}
+
+bool
+AMDILAsmPrinter::isMacroFunc(const MachineInstr *MI)
+{
+ if (MI->getOpcode() != AMDIL::CALL) {
+ return false;
+ }
+ if (!MI->getOperand(0).isGlobal()) {
+ return false;
+ }
+ const llvm::StringRef &nameRef = MI->getOperand(0).getGlobal()->getName();
+ if (nameRef.startswith("__atom_")
+ || nameRef.startswith("__atomic_")) {
+ mMeta->setOutputInst();
+ }
+ return amd::MacroDBFindMacro(nameRef.data()) != -1;
+}
+static const char*
+getRegSwizzle(unsigned reg, bool dst)
+{
+ if (reg >= AMDIL::Rx1 && reg < AMDIL::Rxy1) {
+ return ".x";
+ } else if (reg >= AMDIL::Ry1 && reg < AMDIL::Rz1) {
+ return ".y";
+ } else if (reg >= AMDIL::Rz1 && reg < AMDIL::Rzw1) {
+ return ".z";
+ } else if (reg >= AMDIL::Rw1 && reg < AMDIL::Rx1) {
+ return ".w";
+ } else if (reg >= AMDIL::Rxy1 && reg < AMDIL::Ry1) {
+ return ((dst) ? ".xy__" : ".xy00");
+ } else if (reg >= AMDIL::Rzw1 && reg < AMDIL::SDP) {
+ return ((dst) ? ".__zw" : ".00zw");
+ } else {
+ return "";
+ }
+}
+void
+AMDILAsmPrinter::emitMCallInst(const MachineInstr *MI, OSTREAM_TYPE &O, const char *name)
+{
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ int macronum = amd::MacroDBFindMacro(name);
+ int numIn = amd::MacroDBNumInputs(macronum);
+ int numOut = amd::MacroDBNumOutputs(macronum);
+ if (macronum == -1) {
+ return;
+ }
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ mMacroIDs.insert(macronum);
+ } else {
+ mMFI->addCalledIntr(macronum);
+ }
+ const TargetRegisterClass *trc = NULL;
+ if (strstr(name, "4f32")
+ || strstr(name, "4i32")) {
+ trc = MF->getTarget()
+ .getRegisterInfo()->getRegClass(AMDIL::GPRV4F32RegClassID);
+ } else if (strstr(name, "2f32")
+ || strstr(name, "2i32")) {
+ trc = MF->getTarget()
+ .getRegisterInfo()->getRegClass(AMDIL::GPRV2F32RegClassID);
+ } else {
+ trc = MF->getTarget()
+ .getRegisterInfo()->getRegClass(AMDIL::GPRF32RegClassID);
+ }
+ O << "\tmcall(" << macronum << ")(";
+ int x ;
+ for (x = 0; x < numOut - 1; ++x) {
+ O << getRegisterName(trc->getRegister(x))
+ << getRegSwizzle(trc->getRegister(x), true) << ", ";
+ }
+ O << getRegisterName(trc->getRegister(x))
+ << getRegSwizzle(trc->getRegister(x), true) << "),(";
+ for (x = 0; x < numIn - 1; ++x) {
+ O << getRegisterName(trc->getRegister(x))
+ << getRegSwizzle(trc->getRegister(x), false) << ", ";
+ }
+ O << getRegisterName(trc->getRegister(x))
+ << getRegSwizzle(trc->getRegister(x), false) << ")";
+ O << " ;" << name <<"\n";
+}
+
+#if defined(LLVM_29) || defined(USE_APPLE)
+void
+AMDILAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const
+{
+}
+#else
+void
+AMDILAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const
+{
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ unsigned reg = MLoc.getReg();
+ unsigned baseReg = AMDIL::R1;
+ const char* regStr = NULL;
+ const char* regxStr = NULL;
+ unsigned offset = 0;
+ unsigned size = 32;
+ const char* offStr = NULL;
+ if (isXComponentReg(reg)) {
+ baseReg += (reg - AMDIL::Rx1);
+ regxStr = "DW_OP_regx for x component of register";
+ regStr = "DW_OP_reg for x component of register";
+ offset = 0;
+ offStr = "DW_OP_bit_piece 32 0";
+ } else if (isYComponentReg(reg)) {
+ baseReg += (reg - AMDIL::Ry1);
+ regxStr = "DW_OP_regx for y component of register";
+ regStr = "DW_OP_reg for y component of register";
+ offset = 32;
+ offStr = "DW_OP_bit_piece 32 32";
+ } else if (isZComponentReg(reg)) {
+ baseReg += (reg - AMDIL::Rz1);
+ regxStr = "DW_OP_regx for z component of register";
+ regStr = "DW_OP_reg for z component of register";
+ offset = 64;
+ offStr = "DW_OP_bit_piece 32 64";
+ } else if (isWComponentReg(reg)) {
+ baseReg += (reg - AMDIL::Rw1);
+ regxStr = "DW_OP_regx for w component of register";
+ regStr = "DW_OP_reg for w component of register";
+ offset = 96;
+ offStr = "DW_OP_bit_piece 32 96";
+ } else if (isXYComponentReg(reg)) {
+ baseReg += (reg - AMDIL::Rxy1);
+ regxStr = "DW_OP_regx for xy component of register";
+ regStr = "DW_OP_reg for xy component of register";
+ offset = 0;
+ size = 64;
+ offStr = "DW_OP_bit_piece 64 0";
+ } else if (isZWComponentReg(reg)) {
+ baseReg += (reg - AMDIL::Rzw1);
+ regxStr = "DW_OP_regx for zw component of register";
+ regStr = "DW_OP_reg for zw component of register";
+ offset = 64;
+ size = 64;
+ offStr = "DW_OP_bit_piece 64 64";
+ } else {
+ baseReg = reg;
+ regxStr = "DW_OP_regx for xyzw component of register";
+ regStr = "DW_OP_reg for xyzw component of register";
+ offset = 0;
+ size = 128;
+ offStr = "DW_OP_bit_piece 128 0";
+ }
+ baseReg = RI->getDwarfRegNum(baseReg, false);
+ OutStreamer.AddComment("Loc expr size");
+ unsigned OffsetSize = MCAsmInfo::getULEB128Size(size)
+ + MCAsmInfo::getULEB128Size(offset);
+ if (int Offset = MLoc.getOffset()) {
+ OffsetSize += Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1;
+ OutStreamer.AddComment("Loc expr size");
+ EmitInt16(OffsetSize);
+ OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_fbreg));
+ EmitInt8(dwarf::DW_OP_fbreg);
+ OutStreamer.AddComment("Offset");
+ EmitSLEB128(Offset);
+ } else if (baseReg < 32) {
+ EmitInt16(2 + OffsetSize);
+ OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + baseReg));
+ EmitInt8(dwarf::DW_OP_reg0 + baseReg);
+ } else {
+ EmitInt16(2 + MCAsmInfo::getULEB128Size(baseReg) + OffsetSize);
+ OutStreamer.AddComment(regxStr);
+ EmitInt8(dwarf::DW_OP_regx);
+ OutStreamer.AddComment(Twine(baseReg));
+ EmitULEB128(baseReg);
+ }
+
+ OutStreamer.AddComment(offStr);
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(size);
+ EmitULEB128(offset);
+}
+#endif
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,244 @@
+//===-- AMDILAsmPrinter.h -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_ASM_PRINTER_H_
+#define _AMDIL_ASM_PRINTER_H_
+#include "AMDIL.h"
+#include "AMDILLLVMVersion.h"
+#include "macrodata.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm
+{
+class AMDILKernelManager;
+class AMDILTargetMachine;
+class AMDILMachineFunctionInfo;
+class AMDILModuleInfo;
+class AnalysisUsage;
+class Constant;
+class Function;
+class Module;
+class MachineInstr;
+class MachineBasicBlock;
+class MachineConstantPoolValue;
+class MachineFunction;
+class MachineJumptableInfo;
+class raw_ostream;
+class MCStreamer;
+class MCSymbol;
+class MCInst;
+class MCContext;
+
+
+class LLVM_LIBRARY_VISIBILITY AMDILAsmPrinter : public AsmPrinter
+{
+public:
+ //
+ // Constructor for the AMDIL specific AsmPrinter class.
+ // Interface is defined by LLVM proper and should reference
+ // there for more information.
+ //
+ explicit AMDILAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS);
+
+ //
+ // Destructor for the AsmPrinter class that deletes all the
+ // allocated memory
+ //
+ virtual ~AMDILAsmPrinter();
+
+ //
+ // @param MI Machine instruction to print the operand of
+ // @param opNum operand to print from the specified machine instruciton
+ // @param O The output stream for the operand
+ // @brief Based on the register type, print out register specific
+ // information
+ // and add swizzle information in the cases that require it
+ //
+ virtual void
+ printOperand(const MachineInstr *MI, int opNum
+ , OSTREAM_TYPE &O
+ );
+
+ void
+ EmitGlobalVariable(const GlobalVariable *GV);
+ // overloading ALL AsmPrinter.h virtual functions to better
+ // understand how everything works
+ void
+ EmitStartOfAsmFile(Module &M);
+ void
+ EmitEndOfAsmFile(Module &M);
+ void
+ PrintSpecial(const MachineInstr *MI, const char *Code) const;
+ bool
+ PrintAsmOperand(const MachineInstr *MI, unsigned int OpNo,
+ unsigned int AsmVariant, const char *ExtraCode);
+ bool
+ PrintAsmMemoryOperand(const MachineInstr *MI, unsigned int OpNo,
+ unsigned int AsmVariant,
+ const char *ExtraCode);
+ void
+ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+ void
+ printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const;
+ void
+ printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const;
+ void
+ printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const;
+ virtual void
+ EmitInstruction(const MachineInstr *MI);
+ void
+ EmitFunctionBodyStart();
+ void
+ EmitFunctionBodyEnd();
+ void
+ EmitConstantPool();
+ void
+ EmitFunctionEntryLabel();
+
+ virtual uint32_t getDebugResourceID(const MachineInstr *MI) const;
+
+ //
+ // @param MI Machine instruction to print memory operand of
+ // @param opNum operand to print from the specified machine instrucion
+ // @param Modifier optional modifier for the memory operand
+ // @brief Print the memory operand based on the register type
+ //
+ void
+ printMemOperand(const MachineInstr *MI, int opNum,
+ OSTREAM_TYPE &O,
+ const char *Modifier = NULL);
+
+ //
+ // @param MI Machine instruction to print to the buffer
+ // @brief autogenerated function from tablegen files that prints out
+ // the assembly format of the specified instruction
+ //
+ void
+ printInstruction(const MachineInstr *MI , OSTREAM_TYPE &O); // autogenerated
+
+ const char *getRegisterName(unsigned RegNo);
+
+ //
+ // @param F MachineFunction to print the assembly for
+ // @brief parse the specified machine function and print
+ // out the assembly for all the instructions in the function
+ //
+ bool
+ runOnMachineFunction(MachineFunction &F);
+
+ //
+ // @param MI Machine Instruction to determine if it a macro call
+ // @brief Query to see if the instruction is a Macro or not
+ // @return true if instruction is a macro
+ //
+ bool
+ isMacroCall(const MachineInstr *MI);
+
+ //
+ // @param MI Machine Instruction to determine if the fucntion is a macro
+ // @brief determine if the function is a macro function or a normal
+ // function
+ // @return true if the function call should be transformed to a macro,
+ // false otherwise
+ //
+ bool
+ isMacroFunc(const MachineInstr *MI);
+
+
+ //
+ // @param MI Machine instruction to print swizzle for
+ // @param opNum the operand number to print swizzle for
+ // @brief print out the swizzle for a scalar register class
+ //
+ const char*
+ getSwizzle(const MachineInstr *MI, int opNum);
+
+ //
+ // @return the name of this specific pass
+ //
+ virtual const char*
+ getPassName() const;
+
+ /// EmitDwarfRegOp - Emit dwarf register operation
+ virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
+
+
+protected:
+ //
+ // @param MI Machine instruction to emit the macro code for
+ //
+ // Emits a fully functional macro function that uses the argument
+ // registers as the macro arguments.
+ //
+ virtual void
+ emitMacroFunc(const MachineInstr *MI , OSTREAM_TYPE &O);
+
+ // Flag whether to print out debug information
+ // or not.
+ bool mDebugMode;
+
+ //
+ //
+ void
+ emitMCallInst(const MachineInstr *MI, OSTREAM_TYPE &O, const char *name);
+
+ // Set of alll macros that are used in this compilation unit.
+ llvm::DenseSet<uint32_t> mMacroIDs;
+
+ /// Pointer to the Target Machine that the asm printer
+ /// should be printing compatible code for.
+ AMDILTargetMachine *mTM;
+
+ /// pointer to the kernel manager that keeps track
+ /// of the metadata required by the runtime to
+ /// call a kernel correctly.
+ AMDILKernelManager *mMeta;
+
+ /// Class that holds information about the current
+ /// function that is being processed.
+ AMDILMachineFunctionInfo *mMFI;
+
+ /// Class that holds information about the current
+ /// module that is being processed.
+ AMDILModuleInfo *mAMI;
+
+ /// Name of the current function being printed
+ /// by the asm printer
+ std::string mName;
+
+ /// name of the kernel wrapper of the current function
+ std::string mKernelName;
+
+private:
+ void addCPoolLiteral(const Constant *C);
+
+ /// The constant buffer that the data should be
+ /// allocated in by the runtime
+ int mBuffer;
+
+ /// Flag to determine whether the printer needs
+ /// to print assembly version information in the metadata
+ bool mNeedVersion;
+};
+
+
+} // end of llvm namespace
+
+#endif // _AMDIL_ASM_PRINTER_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,191 @@
+//===-- AMDILBarrierDetect.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "BarrierDetect"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+// The barrier detect pass determines if a barrier has been duplicated in the
+// source program which can cause undefined behaviour if more than a single
+// wavefront is executed in a group. This is because LLVM does not have an
+// execution barrier and if this barrier function gets duplicated, undefined
+// behaviour can occur. In order to work around this, we detect the duplicated
+// barrier and then make the work-group execute in a single wavefront mode,
+// essentially making the barrier a no-op.
+
+namespace
+{
+class LLVM_LIBRARY_VISIBILITY AMDILBarrierDetect : public FunctionPass
+{
+ TargetMachine &TM;
+ static char ID;
+public:
+ AMDILBarrierDetect(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+ ~AMDILBarrierDetect();
+ const char *getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+private:
+ bool detectBarrier(BasicBlock::iterator *BBI);
+ bool mChanged;
+ SmallVector<int64_t, DEFAULT_VEC_SLOTS> bVecMap;
+ const AMDILSubtarget *mStm;
+
+ // Constants used to define memory type.
+ static const unsigned int LOCAL_MEM_FENCE = 1<<0;
+ static const unsigned int GLOBAL_MEM_FENCE = 1<<1;
+ static const unsigned int REGION_MEM_FENCE = 1<<2;
+};
+char AMDILBarrierDetect::ID = 0;
+} // anonymouse namespace
+
+namespace llvm
+{
+FunctionPass *
+createAMDILBarrierDetect(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+{
+ return new AMDILBarrierDetect(TM, OptLevel);
+}
+} // llvm namespace
+
+AMDILBarrierDetect::AMDILBarrierDetect(TargetMachine &TM,
+ CodeGenOpt::Level OptLevel)
+ :
+ FunctionPass(ID),
+ TM(TM)
+{
+}
+
+AMDILBarrierDetect::~AMDILBarrierDetect()
+{
+}
+
+bool AMDILBarrierDetect::detectBarrier(BasicBlock::iterator *BBI)
+{
+ SmallVector<int64_t, DEFAULT_VEC_SLOTS>::iterator bIter;
+ int64_t bID;
+ Instruction *inst = (*BBI);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+
+ if (!CI || !CI->getNumOperands()) {
+ return false;
+ }
+ const Value *funcVal = CI->getOperand(CI->getNumOperands() - 1);
+ if (funcVal && strncmp(funcVal->getName().data(), "barrier", 7)) {
+ return false;
+ }
+
+ if (inst->getNumOperands() >= 3) {
+ const Value *V = inst->getOperand(0);
+ const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+ bID = Cint->getSExtValue();
+ bIter = std::find(bVecMap.begin(), bVecMap.end(), bID);
+ if (bIter == bVecMap.end()) {
+ bVecMap.push_back(bID);
+ } else {
+ if (mStm->device()->isSupported(AMDILDeviceInfo::BarrierDetect)) {
+ AMDILMachineFunctionInfo *MFI =
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ MFI->addMetadata(";limitgroupsize");
+ MFI->addErrorMsg(amd::CompilerWarningMessage[BAD_BARRIER_OPT]);
+ }
+ }
+ }
+ if (mStm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ AMDILMachineFunctionInfo *MFI =
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ MFI->addErrorMsg(amd::CompilerWarningMessage[LIMIT_BARRIER]);
+ MFI->addMetadata(";limitgroupsize");
+ MFI->setUsesLDS();
+ }
+ const Value *V = inst->getOperand(inst->getNumOperands()-2);
+ const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+ Function *iF = dyn_cast<Function>(inst->getOperand(inst->getNumOperands()-1));
+ Module *M = iF->getParent();
+ bID = Cint->getSExtValue();
+ if (bID > 0) {
+ const char *name = "barrier";
+ if (bID == GLOBAL_MEM_FENCE) {
+ name = "barrierGlobal";
+ } else if (bID == LOCAL_MEM_FENCE
+ && mStm->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ name = "barrierLocal";
+ } else if (bID == REGION_MEM_FENCE
+ && mStm->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ name = "barrierRegion";
+ }
+ Function *nF =
+ dyn_cast<Function>(M->getOrInsertFunction(name, iF->getFunctionType()));
+ inst->setOperand(inst->getNumOperands()-1, nF);
+ return false;
+ }
+
+ return false;
+}
+
+bool AMDILBarrierDetect::runOnFunction(Function &MF)
+{
+ mChanged = false;
+ bVecMap.clear();
+ mStm = &TM.getSubtarget<AMDILSubtarget>();
+ Function *F = &MF;
+ safeNestedForEach(F->begin(), F->end(), F->begin()->begin(),
+ std::bind1st(
+ std::mem_fun(
+ &AMDILBarrierDetect::detectBarrier), this));
+ return mChanged;
+}
+
+const char* AMDILBarrierDetect::getPassName() const
+{
+ return "AMDIL Barrier Detect Pass";
+}
+
+bool AMDILBarrierDetect::doInitialization(Module &M)
+{
+ return false;
+}
+
+bool AMDILBarrierDetect::doFinalization(Module &M)
+{
+ return false;
+}
+
+void AMDILBarrierDetect::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,106 @@
+//===-- AMDILBase.td ------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AMDIL Subtarget features.
+//===----------------------------------------------------------------------===//
+def FeatureFP64 : SubtargetFeature<"fp64",
+ "CapsOverride[AMDILDeviceInfo::DoubleOps]",
+ "true",
+ "Enable 64bit double precision operations">;
+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
+ "CapsOverride[AMDILDeviceInfo::ByteStores]",
+ "true",
+ "Enable byte addressable stores">;
+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
+ "CapsOverride[AMDILDeviceInfo::BarrierDetect]",
+ "true",
+ "Enable duplicate barrier detection(HD5XXX or later).">;
+def FeatureImages : SubtargetFeature<"images",
+ "CapsOverride[AMDILDeviceInfo::Images]",
+ "true",
+ "Enable image functions">;
+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
+ "CapsOverride[AMDILDeviceInfo::MultiUAV]",
+ "true",
+ "Generate multiple UAV code(HD5XXX family or later)">;
+def FeatureMacroDB : SubtargetFeature<"macrodb",
+ "CapsOverride[AMDILDeviceInfo::MacroDB]",
+ "true",
+ "Use internal macrodb, instead of macrodb in driver">;
+def FeatureNoAlias : SubtargetFeature<"noalias",
+ "CapsOverride[AMDILDeviceInfo::NoAlias]",
+ "true",
+ "assert that all kernel argument pointers are not aliased">;
+def FeatureNoInline : SubtargetFeature<"no-inline",
+ "CapsOverride[AMDILDeviceInfo::NoInline]",
+ "true",
+ "specify whether to not inline functions">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+ "mIs64bit",
+ "false",
+ "Specify if 64bit addressing should be used.">;
+
+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+ "mIs32on64bit",
+ "false",
+ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+def FeatureDebug : SubtargetFeature<"debug",
+ "CapsOverride[AMDILDeviceInfo::Debug]",
+ "true",
+ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+
+include "AMDILRegisterInfo.td"
+include "AMDILCallingConv.td"
+include "AMDILInstrInfo.td"
+
+def AMDILInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// AMDIL processors supported.
+//===----------------------------------------------------------------------===//
+include "Processors.td"
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+def AMDILAsmWriter : AsmWriter {
+ string AsmWriterClassName = "AsmPrinter";
+ int Variant = 0;
+}
+
+def AMDILAsmParser : AsmParser {
+ string AsmParserClassName = "AsmParser";
+ int Variant = 0;
+
+ string CommentDelimiter = ";";
+
+ string RegisterPrefix = "r";
+
+}
+
+
+def AMDIL : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AMDILInstrInfo;
+ let AssemblyWriters = [AMDILAsmWriter];
+ let AssemblyParsers = [AMDILAsmParser];
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,4057 @@
+//===-- AMDILCFGStructurizer.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "structcfg"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "llvm/Support/Debug.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define FirstNonDebugInstr(A) A->begin()
+using namespace llvm;
+
+// bixia TODO: move this out to analysis lib. Make this work for both target
+// AMDIL and CBackend.
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
+ "matched");
+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
+ "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+ "pattern matched");
+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
+ "pattern matched");
+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
+ "matched");
+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct
+{
+#define SHOWNEWINSTR(i) \
+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+ if (b) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(errs()); \
+ errs() << "\n"; \
+ } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS)
+{
+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+ iterEnd = LoopInfo.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->print(OS, 0);
+ }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src)
+{
+ size_t sz = Src.size();
+ for (size_t i = 0; i < sz/2; ++i) {
+ NodeT *t = Src[i];
+ Src[i] = Src[sz - i - 1];
+ Src[sz - i - 1] = t;
+ }
+}
+
+} //end namespace llvmCFGStruct
+
+static MachineInstr *getLastBreakInstr(MachineBasicBlock *blk)
+{
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(); (iter != blk->rend()); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDIL::BREAK_LOGICALZ_i32)) {
+ return instr;
+ }
+ }
+ return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// MachinePostDominatorTree
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/DominatorInternals.h"
+
+namespace llvm
+{
+
+extern void initializeMachinePostDominatorTreePass(PassRegistry&);
+FunctionPass *createMachinePostDominatorTreePass();
+
+/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
+/// to compute the a post-dominator tree.
+///
+struct MachinePostDominatorTree : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DominatorTreeBase<MachineBasicBlock> *DT;
+ MachinePostDominatorTree() : MachineFunctionPass(ID) {
+ initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
+ DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+ // postdominator
+ }
+
+ ~MachinePostDominatorTree();
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ inline const std::vector<MachineBasicBlock *> &getRoots() const {
+ return DT->getRoots();
+ }
+
+ inline MachineDomTreeNode *getRootNode() const {
+ return DT->getRootNode();
+ }
+
+ inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline MachineBasicBlock *
+ findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
+ return DT->findNearestCommonDominator(A, B);
+ }
+
+ virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
+ DT->print(OS);
+ }
+};
+} //end of namespace llvm
+
+char MachinePostDominatorTree::ID = 0;
+INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
+ "MachinePostDominator Tree Construction",
+ true, true)
+
+FunctionPass *llvm::createMachinePostDominatorTreePass()
+{
+ return new MachinePostDominatorTree();
+}
+
+//const PassInfo *const llvm::MachinePostDominatorsID
+//= &machinePostDominatorTreePass;
+
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F)
+{
+ DT->recalculate(F);
+ //DEBUG(DT->dump());
+ return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree()
+{
+ delete DT;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation
+{
+public:
+ bool isRetired;
+ int sccNum;
+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+ //Instructions defining the corresponding successor.
+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation
+{
+public:
+ BlockT *landBlk;
+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
+ //WHILELOOP(thisloop) init before entering
+ //thisloop.
+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
+ //WHILELOOP(thisloop) init after entering
+ //thisloop.
+ std::set<RegiT> endbranchInitRegs; //Init after entering this loop, at loop
+ //land block, branch cond on this reg.
+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
+ //endif" after ENDLOOP(thisloop) break
+ //outerLoopOf(thisLoop).
+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
+ //endif" after ENDLOOP(thisloop) continue on
+ //outerLoopOf(thisLoop).
+ LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// TrivialRegAlloc
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// Stores the list of defs and uses of a virtual register
+class DefUseList
+{
+ enum {
+ FLAG_DEF = 0,
+ FLAG_USE = 1
+ };
+
+public:
+ // struct that represents a single def or use
+ struct DefOrUseT {
+ unsigned _slotIndex;
+ unsigned _flag; // flag whether this is a def or use
+ bool isDef() const {
+ return _flag == FLAG_DEF;
+ }
+ bool isUse() const {
+ return _flag == FLAG_USE;
+ }
+ DefOrUseT(unsigned slotIndex, unsigned flag)
+ : _slotIndex(slotIndex), _flag(flag) {}
+ };
+
+private:
+ typedef SmallVector<DefOrUseT, 2> DefUseVecT;
+
+public:
+ typedef DefUseVecT::iterator iterator;
+ typedef DefUseVecT::const_iterator const_iterator;
+
+ DefUseVecT _defUses;
+
+ DefUseList() : _defUses() {}
+ void addDef(unsigned slotIndex) {
+ _defUses.push_back(DefOrUseT(slotIndex, FLAG_DEF));
+ }
+ void addUse(unsigned slotIndex) {
+ _defUses.push_back(DefOrUseT(slotIndex, FLAG_USE));
+ }
+ void clear() {
+ _defUses.clear();
+ }
+ iterator begin() {
+ return _defUses.begin();
+ }
+ const_iterator begin() const {
+ return _defUses.begin();
+ }
+ iterator end() {
+ return _defUses.end();
+ }
+ const_iterator end() const {
+ return _defUses.end();
+ }
+ bool isSorted() const;
+ void dump() const;
+};
+
+bool DefUseList::isSorted() const
+{
+ const_iterator it = begin();
+ const_iterator e = end();
+ assert(it != e && "no def/use");
+ const_iterator pre = it;
+ for (++it; it != e; ++it) {
+ if ((*pre)._slotIndex > (*it)._slotIndex) {
+ return false;
+ }
+ pre = it;
+ }
+ return true;
+}
+
+void DefUseList::dump() const
+{
+ for (const_iterator it = begin(), e = end(); it != e; ++it) {
+ const DefOrUseT& defOrUse = *it;
+ const char* str = defOrUse.isDef() ? "def" : "use";
+ errs() << " " << defOrUse._slotIndex << " " << str << "\n";
+ }
+}
+
+// a live interval
+class LiveInterval
+{
+ enum {
+ UndefinedSlotIndex = -1
+ };
+ unsigned _vreg;
+ int _startSlotIndex;
+ int _endSlotIndex;
+
+public:
+ LiveInterval(unsigned vreg)
+ : _vreg(vreg),
+ _startSlotIndex(UndefinedSlotIndex),
+ _endSlotIndex(UndefinedSlotIndex)
+ {}
+ bool hasStart() const {
+ return _startSlotIndex != UndefinedSlotIndex;
+ }
+ bool hasEnd() const {
+ return _endSlotIndex != UndefinedSlotIndex;
+ }
+ void setStart(int slotIndex) {
+ _startSlotIndex = slotIndex;
+ }
+ void setEnd(int slotIndex) {
+ _endSlotIndex = slotIndex;
+ }
+ unsigned vreg() const {
+ return _vreg;
+ }
+ unsigned start() const {
+ return _startSlotIndex;
+ }
+ unsigned end() const {
+ return _endSlotIndex;
+ }
+};
+
+// a list of live intervals
+class LiveIntervals
+{
+ typedef SmallVector<LiveInterval, 16> IntervalVecType;
+
+public:
+ typedef IntervalVecType::iterator iterator;
+ typedef IntervalVecType::const_iterator const_iterator;
+
+private:
+ IntervalVecType _intervals;
+ bool _sorted; // whether the intervals are sorted by start position
+
+private:
+ iterator findIntervalImpl(unsigned vreg);
+
+public:
+ LiveIntervals(bool sorted) : _intervals(), _sorted(sorted) {}
+ LiveInterval* findInterval(unsigned vreg) {
+ iterator it = findIntervalImpl(vreg);
+ if (it == _intervals.end()) {
+ return NULL;
+ }
+ return &*it;
+ }
+ LiveInterval& createInterval(unsigned vreg) {
+ _intervals.push_back(LiveInterval(vreg));
+ return _intervals.back();
+ }
+ void appendInterval(LiveInterval& interval) {
+ if (_sorted) {
+ assert((_intervals.size() == 0
+ || interval.start() >= _intervals.back().start())
+ && "unsorted append into sorted LiveIntervals");
+ }
+ _intervals.push_back(interval);
+ }
+ void insertInterval(LiveInterval& interval) {
+ if (!_sorted) {
+ _intervals.push_back(interval);
+ return;
+ }
+ insertIntervalSorted(interval);
+ }
+
+ void removeInterval(unsigned vreg);
+ iterator removeInterval(iterator it) {
+ return _intervals.erase(it);
+ }
+ void clear() {
+ _intervals.clear();
+ }
+ iterator begin() {
+ return _intervals.begin();
+ }
+ iterator end() {
+ return _intervals.end();
+ }
+ bool isSortedByStart() const;
+ void dump() const;
+
+private:
+ void insertIntervalSorted(LiveInterval& interval);
+};
+
+LiveIntervals::iterator LiveIntervals::findIntervalImpl(unsigned vreg)
+{
+ iterator it = _intervals.begin();
+ iterator end = _intervals.end();
+ for (; it != end; ++it) {
+ if ((*it).vreg() == vreg) {
+ break;
+ }
+ }
+ return it;
+}
+
+void LiveIntervals::insertIntervalSorted(LiveInterval& interval)
+{
+ iterator it = _intervals.begin();
+ iterator end = _intervals.end();
+ for (; it != end; ++it) {
+ if (interval.start() >= (*it).start()) {
+ break;
+ }
+ }
+ _intervals.insert(it, interval);
+}
+
+void LiveIntervals::removeInterval(unsigned vreg)
+{
+ iterator it = findIntervalImpl(vreg);
+ assert(it != _intervals.end() && "interval not found");
+ _intervals.erase(it);
+}
+
+bool LiveIntervals::isSortedByStart() const
+{
+ const_iterator it = _intervals.begin();
+ const_iterator end = _intervals.end();
+ if (it == end) {
+ return true;
+ }
+ const_iterator pre = it;
+ ++it;
+ for (; it != end; ++it) {
+ if ((*pre).start() > (*it).start()) {
+ return false;
+ }
+ pre = it;
+ }
+ return true;
+}
+
+void LiveIntervals::dump() const
+{
+ errs() << "Intervals:\n";
+ const_iterator it = _intervals.begin();
+ const_iterator end = _intervals.end();
+ for (; it != end; ++it) {
+ const LiveInterval& interval = *it;
+ errs() << " vreg " << TargetRegisterInfo::virtReg2Index(interval.vreg())
+ << " start " << interval.start()
+ << " end " << interval.end() << "\n";
+ }
+}
+
+// Trivial linear scan register allocator to allocate physical registers
+// for registers requested during CFGStructurizer pass.
+// Since register allocator has already been run before this pass, we have
+// to define our own register allocator to do very simple register allocation
+// for registers requested during this pass.
+class TrivialRegAlloc
+{
+ typedef SmallVector<LiveIntervals, 2> IntervalsVecT;
+ typedef std::map<unsigned, unsigned> RegMapT;
+ typedef std::set<unsigned> RegSetT;
+ typedef std::map<unsigned, DefUseList*> VRegDefUseMapT;
+
+private:
+ // data structures passed in to this class
+
+ MachineFunction& _func;
+ const TargetRegisterClass& _regClass;
+ // virtual registers that need physical registers to be allocated
+ RegSetT& _vregs;
+
+ // data structures created within this class
+
+ VRegDefUseMapT _vregDefUseMap; // map vreg -> its def/use list
+ BitVector _regInUse; // flags which registers are currently in use
+ // set of physical registers that can be alloc'ed
+ std::vector<unsigned> _regSet;
+ RegMapT _regMap; // virtual to physical register map
+ LiveIntervals _intervals; // list of all live intervals
+ // transient list of currently active live intervals
+ LiveIntervals _activeIntervals;
+ // transient current interval for which we are trying to allocate a register
+ LiveInterval* _currInterval;
+
+private:
+ void initRegSet();
+ void computeIntervals();
+ unsigned getPhysicalRegister();
+ void allocateRegisterFor(LiveInterval& interval);
+ void releaseRegisterFor(const LiveInterval& interval);
+ void handleActiveIntervals(unsigned pos);
+ void allocateRegisters();
+ void rewrite();
+
+public:
+ TrivialRegAlloc(MachineFunction& func,
+ const TargetRegisterClass& regClass,
+ RegSetT& vregs);
+ ~TrivialRegAlloc();
+ void run(); // main driver of the algorithm
+};
+
+TrivialRegAlloc::TrivialRegAlloc(MachineFunction& func,
+ const TargetRegisterClass& regClass,
+ RegSetT& vregs)
+ : _func(func), _regClass(regClass), _vregs(vregs),
+ _vregDefUseMap(), _regInUse(), _regSet(), _regMap(),
+ _intervals(true), _activeIntervals(false), _currInterval(NULL)
+{
+ assert(_regClass.getID() == AMDIL::GPRI32RegClassID && "unimplemented");
+}
+
+TrivialRegAlloc::~TrivialRegAlloc()
+{
+ for (VRegDefUseMapT::iterator I = _vregDefUseMap.begin(),
+ E = _vregDefUseMap.end();
+ I != E; ++I) {
+ delete I->second;
+ }
+}
+
+// find all physical registers that are still available after the global
+// register allocator
+static void findAvailPhysRegs(MachineFunction& func,
+ const class TargetRegisterClass& regClass,
+ std::vector<unsigned>& regSet)
+{
+ ArrayRef<uint16_t> AllocOrder = regClass.getRawAllocationOrder(func);
+ for (const uint16_t *begin = AllocOrder.begin(), *end = AllocOrder.end();
+ begin != end;
+ ++begin) {
+ uint16_t tempReg = *begin;
+ if (func.getRegInfo().isPhysRegOrOverlapUsed(tempReg)) {
+ continue;
+ }
+
+ if (tempReg) {
+ regSet.push_back(tempReg);
+ }
+ }
+}
+
+// initialize the register set with remaining physical registers that are still
+// available and the set of physical registers reserved for CFGStructurizer
+void TrivialRegAlloc::initRegSet()
+{
+ findAvailPhysRegs(_func, _regClass, _regSet);
+ for (unsigned i = AMDIL::CFG1; i <= AMDIL::CFG10; ++i) {
+ _regSet.push_back(i);
+ }
+ _regInUse.resize(_regSet.size(), 0);
+ if (DEBUGME) {
+ errs() << "available physical registers:\n ";
+ for (std::vector<unsigned>::iterator it = _regSet.begin(),
+ e = _regSet.end(); it != e; ++it) {
+ errs() << " " << *it;
+ }
+ errs() << "\n";
+ }
+}
+
+// compute live intervals for the virtual registers created during
+// CFGStructurizer pass
+void TrivialRegAlloc::computeIntervals()
+{
+ MachineBasicBlock* entryBlk
+ = GraphTraits<MachineFunction*>::nodes_begin(&_func);
+ unsigned slotIndex = 0;
+ if (DEBUGME) errs() << "start computeIntervals()\n";
+ // there is only one block now in the function
+ for (MachineBasicBlock::iterator iter = entryBlk->begin(),
+ iterEnd = entryBlk->end();
+ iter != iterEnd;
+ ++iter) {
+ MachineInstr* inst = iter;
+ if (DEBUGME) errs() << *inst;
+ for (unsigned i = 0; i < inst->getNumOperands(); ++i) {
+ MachineOperand& oper = inst->getOperand(i);
+ if (!oper.isReg() || !oper.getReg()) {
+ continue;
+ }
+ unsigned vreg = oper.getReg();
+ // if not a virtual register that needs reg alloc, skip
+ if (!_vregs.count(vreg)) {
+ continue;
+ }
+ if (DEBUGME) errs() << " oper " << oper << " vreg " << TargetRegisterInfo::virtReg2Index(vreg) << "\n";
+ // add to vreg's def/use list
+ DefUseList*& defUses = _vregDefUseMap[vreg];
+ LiveInterval* interval = _intervals.findInterval(vreg);
+ if (oper.isDef()) {
+ if (defUses == NULL) {
+ defUses = new DefUseList();
+ }
+ defUses->addDef(slotIndex);
+ if (interval == NULL) {
+ interval = &_intervals.createInterval(vreg);
+ }
+ if (!interval->hasStart()) {
+ interval->setStart(slotIndex);
+ if (DEBUGME)
+ errs() << "interval for vreg "
+ << TargetRegisterInfo::virtReg2Index(vreg)
+ << " start at " << slotIndex << "\n";
+ } else {
+ assert(slotIndex > interval->start() && "sanity");
+ }
+ } else {
+ assert(defUses && "use before def");
+ defUses->addUse(slotIndex);
+ assert(interval && "use before def");
+ assert((!interval->hasEnd() || slotIndex > interval->end())
+ && "sanity");
+ interval->setEnd(slotIndex);
+ if (DEBUGME)
+ errs() << "interval for vreg "
+ << TargetRegisterInfo::virtReg2Index(vreg)
+ << " end at " << slotIndex << "\n";
+ }
+ }
+ ++slotIndex;
+ }
+ if (DEBUGME) {
+ _intervals.dump();
+ errs() << "def/use map: \n";
+ for (VRegDefUseMapT::const_iterator it = _vregDefUseMap.begin(),
+ e = _vregDefUseMap.end(); it != e; ++it) {
+ DefUseList* defUses = it->second;
+ errs() << " vreg "
+ << TargetRegisterInfo::virtReg2Index(it->first) << "\n";
+ defUses->dump();
+ }
+ }
+ assert(_intervals.isSortedByStart() && "_intervals not sorted");
+#ifndef NDEBUG
+ for (VRegDefUseMapT::iterator I = _vregDefUseMap.begin(),
+ E = _vregDefUseMap.end();
+ I != E; ++I) {
+ assert(I->second->isSorted() && "def/uses not sorted");
+ }
+#endif
+}
+
+// pick a physical register that is not in use
+unsigned TrivialRegAlloc::getPhysicalRegister()
+{
+ for (unsigned i = 0; i < _regInUse.size(); ++i) {
+ if (!_regInUse[i]) {
+ _regInUse[i] = 1;
+ return _regSet[i];
+ }
+ }
+ // No physical register available. Has to spill.
+ // TODO: add spiller
+ abort();
+ return 0;
+}
+
+// allocate a physical register for the live interval
+void TrivialRegAlloc::allocateRegisterFor(LiveInterval& interval)
+{
+ _currInterval = &interval;
+ unsigned vreg = interval.vreg();
+ unsigned physicalReg = getPhysicalRegister();
+ _regMap[vreg] = physicalReg;
+ if (DEBUGME)
+ errs() << "allocated reg " << physicalReg << " to vreg "
+ << TargetRegisterInfo::virtReg2Index(vreg) << "\n";
+// _func->getRegInfo().setPhysRegUsed(tempReg);
+}
+
+// release physical register allocated for the interval
+void TrivialRegAlloc::releaseRegisterFor(const LiveInterval& interval)
+{
+ unsigned physicalReg = _regMap[interval.vreg()];
+ unsigned i = 0;
+ for (; i < _regSet.size(); ++i) {
+ if (_regSet[i] == physicalReg) {
+ break;
+ }
+ }
+ assert(i < _regSet.size() && "invalid physical regsiter");
+ _regInUse[i] = 0;
+}
+
+// remove out of active intervals list if an interval becomes inactive
+void TrivialRegAlloc::handleActiveIntervals(unsigned pos)
+{
+ for (LiveIntervals::iterator it = _activeIntervals.begin();
+ it != _activeIntervals.end();) {
+ LiveInterval& interval = *it;
+ if (pos > interval.end()) {
+ releaseRegisterFor(interval);
+ it = _activeIntervals.removeInterval(it);
+ } else {
+ ++it;
+ }
+ }
+}
+
+// allocate physical registers for each live interval in the interval list
+void TrivialRegAlloc::allocateRegisters()
+{
+ // intervals that just become active
+ for (LiveIntervals::iterator it = _intervals.begin(),
+ end = _intervals.end();
+ it != end;
+ ++it) {
+ LiveInterval& interval = *it;
+ // remove intervals that become inactive out of active list
+ handleActiveIntervals(interval.start());
+ // interval becomes active
+ _activeIntervals.appendInterval(interval);
+ // allocate registers for interval that just becomes active
+ allocateRegisterFor(interval);
+ }
+}
+
+// rewrite the machine instructions to use the physical registers allocated
+void TrivialRegAlloc::rewrite()
+{
+ MachineBasicBlock* entryBlk
+ = GraphTraits<MachineFunction*>::nodes_begin(&_func);
+ // there is only one block now in the function
+ for (MachineBasicBlock::iterator iter = entryBlk->begin(),
+ iterEnd = entryBlk->end();
+ iter != iterEnd;
+ ++iter) {
+ MachineInstr* inst = iter;
+ for (unsigned i = 0; i < inst->getNumOperands(); ++i) {
+ MachineOperand& oper = inst->getOperand(i);
+ if (!oper.isReg() || !oper.getReg()) {
+ continue;
+ }
+ unsigned vreg = oper.getReg();
+ // if not a virtual register that needs reg alloc, skip
+ if (!_vregs.count(vreg)) {
+ continue;
+ }
+ assert(_regMap.find(vreg) != _regMap.end() && "register not allocated");
+ unsigned physicalReg = _regMap[vreg];
+ oper.setReg(physicalReg);
+ }
+ }
+}
+
+// the main driver of this register allocator
+void TrivialRegAlloc::run()
+{
+ initRegSet();
+ computeIntervals();
+ allocateRegisters();
+ rewrite();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class CFGStructurizer
+{
+public:
+ typedef enum {
+ Not_SinglePath = 0,
+ SinglePath_InPath = 1,
+ SinglePath_NotInPath = 2
+ } PathToKind;
+
+public:
+ typedef typename PassT::InstructionType InstrT;
+ typedef typename PassT::FunctionType FuncT;
+ typedef typename PassT::DominatortreeType DomTreeT;
+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
+ typedef typename PassT::LoopinfoType LoopInfoT;
+
+ typedef GraphTraits<FuncT *> FuncGTraits;
+ //typedef FuncGTraits::nodes_iterator BlockIterator;
+ typedef typename FuncT::iterator BlockIterator;
+
+ typedef typename FuncGTraits::NodeType BlockT;
+ typedef GraphTraits<BlockT *> BlockGTraits;
+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
+ //typedef BlockGTraits::succ_iterator InstructionIterator;
+ typedef typename BlockT::iterator InstrIterator;
+
+ typedef CFGStructTraits<PassT> CFGTraits;
+ typedef BlockInformation<InstrT> BlockInfo;
+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
+
+ typedef int RegiT;
+ typedef typename PassT::LoopType LoopT;
+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+ //landing info for loop break
+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
+
+public:
+ CFGStructurizer();
+ ~CFGStructurizer();
+
+ /// Perform the CFG structurization
+ bool run(FuncT &Func, PassT &Pass);
+
+ /// Perform the CFG preparation
+ bool prepare(FuncT &Func, PassT &Pass);
+
+private:
+ void orderBlocks();
+ void printOrderedBlocks(llvm::raw_ostream &OS);
+
+ void processAddedToTraversalBlocks();
+
+ int patternMatch(BlockT *CurBlock);
+ int patternMatchGroup(BlockT *CurBlock);
+
+ int serialPatternMatch(BlockT *CurBlock);
+ int ifPatternMatch(BlockT *CurBlock);
+ int switchPatternMatch(BlockT *CurBlock);
+ int loopendPatternMatch(BlockT *CurBlock);
+ int loopPatternMatch(BlockT *CurBlock);
+
+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ //int loopWithoutBreak(BlockT *);
+
+ inline int getRegister(const class TargetRegisterClass *RegClass);
+ void handleLoopbreak(BlockT *ExitingBlock, LoopT *ExitingLoop,
+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+ BlockT *ContBlock, LoopT *contLoop);
+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT **LandBlockPtr);
+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT *LandBlock,
+ bool Detail = false);
+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+ BlockT *TrueBlock, BlockT *FalseBlock,
+ BlockT *LandBlock);
+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+ BlockT *ExitLandBlock, RegiT SetReg);
+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+ RegiT SetReg);
+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+ std::set<BlockT*> &ExitBlockSet,
+ BlockT *ExitLandBlk);
+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+ BlockTSmallerVector &ExitingBlocks,
+ BlockTSmallerVector &ExitBlocks);
+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+ void removeUnconditionalBranch(BlockT *SrcBlock);
+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+ void removeSuccessor(BlockT *SrcBlock);
+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+ InstrIterator InsertPos);
+
+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
+ int getSCCNum(BlockT *srcBlk);
+
+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+ bool isRetiredBlock(BlockT *SrcBlock);
+ bool isActiveLoophead(BlockT *CurBlock);
+ bool needMigrateBlock(BlockT *Block);
+
+ void addToTraversalBlock(BlockT *srcBlock);
+
+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+ BlockTSmallerVector &exitBlocks,
+ std::set<BlockT*> &ExitBlockSet);
+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+ BlockT *getLoopLandBlock(LoopT *LoopRep);
+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+ bool hasBackEdge(BlockT *curBlock);
+ unsigned getLoopDepth (LoopT *LoopRep);
+ int countActiveBlock(
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+ DomTreeT *domTree;
+ PostDomTreeT *postDomTree;
+ LoopInfoT *loopInfo;
+ PassT *passRep;
+ FuncT *funcRep;
+
+ BlockInfoMap blockInfoMap;
+ LoopLandInfoMap loopLandInfoMap;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> addedToTraversalBlks;
+ std::set<unsigned> vregs; // new virtual registers created
+}; //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL)
+{
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer()
+{
+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+ E = blockInfoMap.end(); I != E; ++I) {
+ delete I->second;
+ }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass)
+{
+ passRep = &pass;
+ funcRep = &func;
+
+ bool changed = false;
+ //func.RenumberBlocks();
+
+ //to do, if not reducible flow graph, make it so ???
+
+ if (DEBUGME) {
+ errs() << "AMDILCFGStructurizer::prepare\n";
+ //func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //domTree = CFGTraits::getDominatorTree(pass);
+ //if (DEBUGME) {
+ // domTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //postDomTree = CFGTraits::getPostDominatorTree(pass);
+ //if (DEBUGME) {
+ // postDomTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+ iterEnd = loopInfo->end();
+ iter != iterEnd; ++iter) {
+ LoopT* loopRep = (*iter);
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (exitingBlks.size() == 0) {
+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+ if (dummyExitBlk != NULL)
+ retBlks.push_back(dummyExitBlk);
+ }
+ }
+
+ // Remove unconditional branch instr.
+ // Add dummy exit block iff there are multiple returns.
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+ iterBlk != iterEndBlk;
+ ++iterBlk) {
+ BlockT *curBlk = *iterBlk;
+ removeUnconditionalBranch(curBlk);
+ removeRedundantConditionalBranch(curBlk);
+ if (CFGTraits::isReturnBlock(curBlk)) {
+ retBlks.push_back(curBlk);
+ }
+ assert(curBlk->succ_size() <= 2);
+ //assert(curBlk->size() > 0);
+ //removeEmptyBlock(curBlk) ??
+ } //for
+
+ if (retBlks.size() >= 2) {
+ addDummyExitBlock(retBlks);
+ changed = true;
+ }
+
+ return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass)
+{
+ passRep = &pass;
+ funcRep = &func;
+
+ //func.RenumberBlocks();
+
+ //Assume reducible CFG...
+ if (DEBUGME) {
+ errs() << "AMDILCFGStructurizer::run\n";
+ errs() << func.getFunction()->getName().str() << "\n";
+ //func.viewCFG();
+ //func.viewCFGOnly();
+ func.dump();
+ }
+
+#if 1
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ domTree = CFGTraits::getDominatorTree(pass);
+ if (DEBUGME) {
+ domTree->print(errs(), (const llvm::Module*)0);
+ }
+#endif
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ postDomTree = CFGTraits::getPostDominatorTree(pass);
+ if (DEBUGME) {
+ postDomTree->print(errs());
+ }
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+//#define STRESSTEST
+#ifdef STRESSTEST
+ //Use the worse block ordering to test the algorithm.
+ ReverseVector(orderedBlks);
+#endif
+
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+ int numIter = 0;
+ bool finish = false;
+ BlockT *curBlk;
+ bool makeProgress = false;
+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+ orderedBlks.end());
+
+ do {
+ ++numIter;
+ if (DEBUGME) {
+ errs() << "numIter = " << numIter
+ << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+ }
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin();
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlkEnd = orderedBlks.end();
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ sccBeginIter = iterBlk;
+ BlockT *sccBeginBlk = NULL;
+ int sccNumBlk = 0; // The number of active blocks, init to a
+ // maximum possible number.
+ int sccNumIter; // Number of iteration in this SCC.
+
+ while (iterBlk != iterBlkEnd) {
+ curBlk = *iterBlk;
+
+ if (sccBeginBlk == NULL) {
+ sccBeginIter = iterBlk;
+ sccBeginBlk = curBlk;
+ sccNumIter = 0;
+ sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+ if (DEBUGME) {
+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+ errs() << "\n";
+ }
+ processAddedToTraversalBlocks();
+ }
+
+ if (!isRetiredBlock(curBlk)) {
+ patternMatch(curBlk);
+ }
+
+ ++iterBlk;
+
+ bool contNextScc = true;
+ if (iterBlk == iterBlkEnd
+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+ // Just finish one scc.
+ ++sccNumIter;
+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+ if (sccRemainedNumBlk >= 1 && sccRemainedNumBlk >= sccNumBlk) {
+ if (DEBUGME) {
+ errs() << "Can't reduce processing SCC " << getSCCNum(curBlk)
+ << ", sccNumIter = " << sccNumIter
+ << ", remain # of blocks " << sccRemainedNumBlk;
+ errs() << "doesn't make any progress\n";
+ }
+ contNextScc = true;
+ } else if (sccRemainedNumBlk >= 1 && sccRemainedNumBlk < sccNumBlk) {
+ sccNumBlk = sccRemainedNumBlk;
+ iterBlk = sccBeginIter;
+ contNextScc = false;
+ if (DEBUGME) {
+ errs() << "repeat processing SCC" << getSCCNum(curBlk)
+ << "sccNumIter = " << sccNumIter << "\n";
+ //func.viewCFG();
+ //func.viewCFGOnly();
+ }
+ } else {
+ if (DEBUGME) {
+ errs() << "finish processing SCC" << getSCCNum(curBlk)
+ << ", remain # of blocks " << sccRemainedNumBlk << "\n";
+ }
+ // Finish the current scc.
+ contNextScc = true;
+ }
+ } else {
+ // Continue on next component in the current scc.
+ contNextScc = false;
+ }
+
+ if (contNextScc) {
+ sccBeginBlk = NULL;
+ }
+ } //while, "one iteration" over the function.
+
+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+ if (entryBlk->succ_size() == 0) {
+ finish = true;
+ if (DEBUGME) {
+ errs() << "Reduce to one block\n";
+ }
+ } else {
+ int newnumRemainedBlk
+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ // consider cloned blocks ??
+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+ makeProgress = true;
+ numRemainedBlk = newnumRemainedBlk;
+ } else {
+ makeProgress = false;
+ if (DEBUGME) {
+ errs() << "No progress\n";
+ }
+ }
+ }
+ } while (!finish && makeProgress);
+
+ // Misc wrap up to maintain the consistency of the Function representation.
+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+ // allocate physical registers for virtual registers created during this pass
+ TrivialRegAlloc regAlloc(func, AMDIL::GPRI32RegClass, vregs);
+ regAlloc.run();
+
+ // Detach retired Block, release memory.
+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ if ((*iterMap).second && (*iterMap).second->isRetired) {
+ assert(((*iterMap).first)->getNumber() != -1);
+ if (DEBUGME) {
+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+ }
+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
+ }
+ delete (*iterMap).second;
+ }
+ blockInfoMap.clear();
+
+ // clear loopLandInfoMap
+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ delete (*iterMap).second;
+ }
+ loopLandInfoMap.clear();
+
+ if (DEBUGME) {
+ //func.viewCFG();
+ //func.dump();
+ }
+
+ if (!finish) {
+ MachineFunction *MF = &func;
+ AMDILMachineFunctionInfo *mMFI =
+ MF->getInfo<AMDILMachineFunctionInfo>();
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[IRREDUCIBLE_CF]);
+ }
+
+ return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os)
+{
+ size_t i = 0;
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+ iterBlk != iterBlkEnd;
+ ++iterBlk, ++i) {
+ os << "BB" << (*iterBlk)->getNumber();
+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+ if (i != 0 && i % 10 == 0) {
+ os << "\n";
+ } else {
+ os << " ";
+ }
+ }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks()
+{
+ int sccNum = 0;
+ BlockT *bb;
+ for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+ std::vector<BlockT *> &sccNext = *sccIter;
+ for (typename std::vector<BlockT *>::const_iterator
+ blockIter = sccNext.begin(), blockEnd = sccNext.end();
+ blockIter != blockEnd; ++blockIter) {
+ bb = *blockIter;
+ orderedBlks.push_back(bb);
+ recordSccnum(bb, sccNum);
+ }
+ }
+
+ //walk through all the block in func to check for unreachable
+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+ blockEnd1 = FuncGTraits::nodes_end(funcRep);
+ blockIter1 != blockEnd1; ++blockIter1) {
+ BlockT *bb = &(*blockIter1);
+ sccNum = getSCCNum(bb);
+ if (sccNum == INVALIDSCCNUM) {
+ errs() << "unreachable block BB" << bb->getNumber() << "\n";
+ }
+ } //end of for
+} //orderBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::processAddedToTraversalBlocks()
+{
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = addedToTraversalBlks.begin();
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlkEnd = addedToTraversalBlks.end();
+ BlockT *curBlk;
+
+ while (iterBlk != iterBlkEnd) {
+ curBlk = *iterBlk;
+
+ if (!isRetiredBlock(curBlk)) {
+ patternMatch(curBlk);
+ }
+
+ ++iterBlk;
+ }
+} //CFGStructurizer<PassT>::processAddedToTraversalBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk)
+{
+ int numMatch = 0;
+ int curMatch;
+
+ if (DEBUGME) {
+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+ }
+
+ while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+ numMatch += curMatch;
+ }
+
+ if (DEBUGME) {
+ errs() << "End patternMatch BB" << curBlk->getNumber()
+ << ", numMatch = " << numMatch << "\n";
+ }
+
+ return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk)
+{
+ int numMatch = 0;
+ numMatch += serialPatternMatch(curBlk);
+ numMatch += ifPatternMatch(curBlk);
+ //numMatch += switchPatternMatch(curBlk);
+ numMatch += loopendPatternMatch(curBlk);
+ numMatch += loopPatternMatch(curBlk);
+ return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk)
+{
+ if (curBlk->succ_size() != 1) {
+ return 0;
+ }
+
+ BlockT *childBlk = *curBlk->succ_begin();
+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+ return 0;
+ }
+
+ mergeSerialBlock(curBlk, childBlk);
+ ++numSerialPatternMatch;
+ return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk)
+{
+ //two edges
+ if (curBlk->succ_size() != 2) {
+ return 0;
+ }
+
+ if (hasBackEdge(curBlk)) {
+ return 0;
+ }
+
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+ if (branchInstr == NULL) {
+ return 0;
+ }
+
+ assert(CFGTraits::isCondBranch(branchInstr));
+
+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+ BlockT *landBlk;
+ int cloned = 0;
+
+ // TODO: Simplify
+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+ landBlk = *trueBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+ landBlk = NULL;
+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+ landBlk = falseBlk;
+ falseBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && *falseBlk->succ_begin() == trueBlk) {
+ landBlk = trueBlk;
+ trueBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+ landBlk = *falseBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+ landBlk = *trueBlk->succ_begin();
+ } else {
+ return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+ }
+
+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+ // new BB created for landBlk==NULL may introduce new challenge to the
+ // reduction process.
+ if (landBlk != NULL &&
+ ((trueBlk && trueBlk->pred_size() > 1)
+ || (falseBlk && falseBlk->pred_size() > 1))) {
+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+ }
+
+ if (trueBlk && trueBlk->pred_size() > 1) {
+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+ ++cloned;
+ }
+
+ if (falseBlk && falseBlk->pred_size() > 1) {
+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+ ++cloned;
+ }
+
+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+ ++numIfPatternMatch;
+
+ numClonedBlock += cloned;
+
+ return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk)
+{
+ return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk)
+{
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ typename std::vector<LoopT *> nestedLoops;
+ while (loopRep) {
+ nestedLoops.push_back(loopRep);
+ loopRep = loopRep->getParentLoop();
+ }
+
+ if (nestedLoops.size() == 0) {
+ return 0;
+ }
+
+ // Process nested loop outside->inside, so "continue" to a outside loop won't
+ // be mistaken as "break" of the current loop.
+ int num = 0;
+ for (typename std::vector<LoopT *>::reverse_iterator
+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+ iter != iterEnd; ++iter) {
+ loopRep = *iter;
+
+ if (getLoopLandBlock(loopRep) != NULL) {
+ continue;
+ }
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+ if (numBreak == -1) {
+ break;
+ }
+
+ int numCont = loopcontPatternMatch(loopRep, loopHeader);
+ num += numBreak + numCont;
+ }
+
+ return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk)
+{
+ if (curBlk->succ_size() != 0) {
+ return 0;
+ }
+
+ int numLoop = 0;
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+ if (loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ mergeLooplandBlock(curBlk, loopLand);
+ ++numLoop;
+ }
+ }
+ loopRep = loopRep->getParentLoop();
+ }
+
+ numLoopPatternMatch += numLoop;
+
+ return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader)
+{
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+ }
+
+ if (exitingBlks.size() == 0) {
+ setLoopLandBlock(loopRep);
+ return 0;
+ }
+
+ // Compute the corresponding exitBlks and exit block set.
+ BlockTSmallerVector exitBlks;
+ std::set<BlockT *> exitBlkSet;
+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *exitingBlk = *iter;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ exitBlks.push_back(exitBlk);
+ exitBlkSet.insert(exitBlk); //non-duplicate insert
+ }
+
+ assert(exitBlkSet.size() > 0);
+ assert(exitBlks.size() == exitingBlks.size());
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+ }
+
+ // Find exitLandBlk.
+ BlockT *exitLandBlk = NULL;
+ int numCloned = 0;
+ int numSerial = 0;
+
+ if (exitBlkSet.size() == 1) {
+ exitLandBlk = *exitBlkSet.begin();
+ } else {
+ exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+ if (exitLandBlk == NULL) {
+ return -1;
+ }
+
+ bool allInPath = true;
+ bool allNotInPath = true;
+ for (typename std::set<BlockT*>::const_iterator
+ iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+
+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+ if (DEBUGME) {
+ errs() << "BB" << exitBlk->getNumber()
+ << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+ << pathKind << "\n";
+ }
+
+ allInPath = allInPath && (pathKind == SinglePath_InPath);
+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+ if (!allInPath && !allNotInPath) {
+ if (DEBUGME) {
+ errs() << "singlePath check fail\n";
+ }
+ return -1;
+ }
+ } // check all exit blocks
+
+ if (allNotInPath) {
+ // TODO: Simplify, maybe separate function?
+ //funcRep->viewCFG();
+ LoopT *parentLoopRep = loopRep->getParentLoop();
+ BlockT *parentLoopHeader = NULL;
+ if (parentLoopRep)
+ parentLoopHeader = parentLoopRep->getHeader();
+
+ if (exitLandBlk == parentLoopHeader &&
+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+ loopRep,
+ exitBlkSet,
+ exitLandBlk)) != NULL) {
+ if (DEBUGME) {
+ errs() << "relocateLoopcontBlock success\n";
+ }
+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+ exitingBlks,
+ exitBlks)) != NULL) {
+ if (DEBUGME) {
+ errs() << "insertEndbranchBlock success\n";
+ }
+ } else {
+ if (DEBUGME) {
+ errs() << "loop exit fail\n";
+ }
+ return -1;
+ }
+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+ exitingBlks,
+ exitBlks)) != NULL) {
+ //current addLoopEndbranchBlock always does something and return non-NULL
+ if (DEBUGME) {
+ errs() << "insertEndbranchBlock success\n";
+ }
+ }
+
+ // Handle side entry to exit path.
+ exitBlks.clear();
+ exitBlkSet.clear();
+ for (typename BlockTSmallerVector::iterator iterExiting =
+ exitingBlks.begin(),
+ iterExitingEnd = exitingBlks.end();
+ iterExiting != iterExitingEnd; ++iterExiting) {
+ BlockT *exitingBlk = *iterExiting;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ BlockT *newExitBlk = exitBlk;
+
+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+ ++numCloned;
+ }
+
+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+ exitBlks.push_back(newExitBlk);
+ exitBlkSet.insert(newExitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ numSerial += serialPatternMatch(exitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ if (exitBlk->pred_size() > 1) {
+ if (exitBlk != exitLandBlk) {
+ return -1;
+ }
+ } else {
+ if (exitBlk != exitLandBlk &&
+ (exitBlk->succ_size() != 1 ||
+ *exitBlk->succ_begin() != exitLandBlk)) {
+ return -1;
+ }
+ }
+ }
+ } // else
+
+ // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+ // Fold break into the breaking block. Leverage across level breaks.
+ assert(exitingBlks.size() == exitBlks.size());
+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+ BlockT *exitBlk = *iterExit;
+ BlockT *exitingBlk = *iterExiting;
+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+ }
+
+ int numBreak = static_cast<int>(exitingBlks.size());
+ numLoopbreakPatternMatch += numBreak;
+ numClonedBlock += numCloned;
+ return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader)
+{
+ int numCont = 0;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+ for (typename InvBlockGTraits::ChildIteratorType iter =
+ InvBlockGTraits::child_begin(loopHeader),
+ iterEnd = InvBlockGTraits::child_end(loopHeader);
+ iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ if (loopRep->contains(curBlk)) {
+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+ loopHeader, loopRep);
+ contBlk.push_back(curBlk);
+ ++numCont;
+ }
+ }
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+ iter = contBlk.begin(), iterEnd = contBlk.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->removeSuccessor(loopHeader);
+ }
+
+ numLoopcontPatternMatch += numCont;
+
+ return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+ BlockT *src2Blk)
+{
+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+ // same loop with LoopLandInfo without explicitly keeping track of
+ // loopContBlks and loopBreakBlks, this is a method to get the information.
+ //
+ if (src1Blk->succ_size() == 0) {
+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+ if (theEntry != NULL) {
+ if (DEBUGME) {
+ errs() << "isLoopContBreakBlock yes src1 = BB"
+ << src1Blk->getNumber()
+ << " src2 = BB" << src2Blk->getNumber() << "\n";
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+} //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk)
+{
+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+ if (num == 0) {
+ if (DEBUGME) {
+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+ }
+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+
+ if (num == 0) {
+ if (DEBUGME) {
+ errs() << "handleJumpintoIf check NULL as common postdom:";
+ }
+ BlockT* trueEnd;
+ BlockT* falseEnd;
+ if ((trueEnd = singlePathEnd(trueBlk, NULL)) &&
+ (falseEnd = singlePathEnd(falseBlk, NULL)) &&
+ loopInfo->getLoopFor(trueEnd) == loopInfo->getLoopFor(falseEnd)) {
+ if (DEBUGME) {
+ errs() << " working\n";
+ }
+ num += cloneOnSideEntryTo(headBlk, trueBlk, NULL);
+ num += cloneOnSideEntryTo(headBlk, falseBlk, NULL);
+
+ numClonedBlock += num;
+ num += serialPatternMatch(*headBlk->succ_begin());
+ num += serialPatternMatch(*(headBlk->succ_begin()+1));
+ num += ifPatternMatch(headBlk);
+ assert(num > 0); //
+ } else {
+ if (DEBUGME) {
+ errs() << " not working\n";
+ }
+ }
+ } //check NULL
+
+ }
+ return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk)
+{
+ int num = 0;
+ BlockT *downBlk;
+
+ //trueBlk could be the common post dominator
+ downBlk = trueBlk;
+
+ if (DEBUGME) {
+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+ << " true = BB" << trueBlk->getNumber()
+ << ", numSucc=" << trueBlk->succ_size()
+ << " false = BB" << falseBlk->getNumber() << "\n";
+ }
+
+ while (downBlk) {
+ if (DEBUGME) {
+ errs() << "check down = BB" << downBlk->getNumber();
+ }
+
+ if (//postDomTree->dominates(downBlk, falseBlk) &&
+ singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+ if (DEBUGME) {
+ errs() << " working\n";
+ }
+
+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+ numClonedBlock += num;
+ num += serialPatternMatch(*headBlk->succ_begin());
+ num += serialPatternMatch(*(headBlk->succ_begin()+1));
+ num += ifPatternMatch(headBlk);
+ assert(num > 0); //
+
+ break;
+ }
+ if (DEBUGME) {
+ errs() << " not working\n";
+ }
+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+ } // walk down the postDomTree
+
+ return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk,
+ bool detail)
+{
+ errs() << "head = BB" << headBlk->getNumber()
+ << " size = " << headBlk->size();
+ if (detail) {
+ errs() << "\n";
+ headBlk->print(errs());
+ errs() << "\n";
+ }
+
+ if (trueBlk) {
+ errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+ << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ trueBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (falseBlk) {
+ errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+ << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ falseBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (landBlk) {
+ errs() << ", land = BB" << landBlk->getNumber() << " size = "
+ << landBlk->size() << " numPred = " << landBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ landBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+
+ errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT **plandBlk)
+{
+ bool migrateTrue = false;
+ bool migrateFalse = false;
+
+ BlockT *landBlk = *plandBlk;
+
+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+ && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+ if (trueBlk == falseBlk) {
+ return 0;
+ }
+
+#if 0
+ if (DEBUGME) {
+ errs() << "improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+#endif
+
+ // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
+ // May consider the # landBlk->pred_size() as it represents the number of
+ // assignment initReg = .. needed to insert.
+ migrateTrue = needMigrateBlock(trueBlk);
+ migrateFalse = needMigrateBlock(falseBlk);
+
+ if (!migrateTrue && !migrateFalse) {
+ return 0;
+ }
+
+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+ // have more than one predecessors. without doing this, its predecessor
+ // rather than headBlk will have undefined value in initReg.
+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+ migrateTrue = true;
+ }
+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+ migrateFalse = true;
+ }
+
+ if (DEBUGME) {
+ errs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+ //
+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+ // {initReg = 0; org falseBlk branch }
+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+ // => org landBlk
+ // if landBlk->pred_size() > 2, put the about if-else inside
+ // if (initReg !=2) {...}
+ //
+ // add initReg = initVal to headBlk
+ unsigned initReg = getRegister(&AMDIL::GPRI32RegClass);
+ if (!migrateTrue || !migrateFalse) {
+ int initVal = migrateTrue ? 0 : 1;
+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+ }
+
+ int numNewBlk = 0;
+
+ if (landBlk == NULL) {
+ landBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(landBlk); //insert to function
+
+ if (trueBlk) {
+ trueBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ if (falseBlk) {
+ falseBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ numNewBlk ++;
+ }
+
+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+ //insert AMDIL::ENDIF to avoid special case "input landBlk == NULL"
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos
+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDIL::ENDIF, passRep));
+
+ if (landBlkHasOtherPred) {
+ unsigned immReg = getRegister(&AMDIL::GPRI32RegClass);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+ unsigned cmpResReg = getRegister(&AMDIL::GPRI32RegClass);
+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+ initReg, immReg);
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+ AMDIL::IF_LOGICALZ_i32, passRep,
+ cmpResReg, DebugLoc());
+ }
+
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDIL::IF_LOGICALNZ_i32,
+ passRep, initReg, DebugLoc());
+
+ if (migrateTrue) {
+ migrateInstruction(trueBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 1).
+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+ }
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::ELSE, passRep);
+
+ if (migrateFalse) {
+ migrateInstruction(falseBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 0)
+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+ }
+ //CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep);
+
+ if (landBlkHasOtherPred) {
+ // add endif
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep);
+
+ // put initReg = 2 to other predecessors of landBlk
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+ ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (curBlk != trueBlk && curBlk != falseBlk) {
+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+ }
+ } //for
+ }
+ if (DEBUGME) {
+ errs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // update landBlk
+ *plandBlk = landBlk;
+
+ return numNewBlk;
+} //improveSimpleJumpintoIf
+
+// Since we are after the register allocator, we don't want to use virtual
+// registers as it is possible that we can get a virtual register that is
+// passed the 65K limit of IL text format. So instead we serach through the
+// register class for an unused physical register and mark it as used. If we
+// cannot find a register, then we do some funky math on the virtual registers
+// so that we don't clobber the physicals and make sure we don't go over the
+// 65k limit.
+template<class PassT>
+inline int CFGStructurizer<PassT>::getRegister(
+ const class TargetRegisterClass *RegClass)
+{
+ unsigned reg = funcRep->getRegInfo().createVirtualRegister(RegClass);
+ vregs.insert(reg);
+ if (DEBUGME)
+ errs() << "created virtual register "
+ << TargetRegisterInfo::virtReg2Index(reg) << "\n";
+ return reg;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+ LoopT *exitingLoop,
+ BlockT *exitBlk,
+ LoopT *exitLoop,
+ BlockT *landBlk)
+{
+ if (DEBUGME) {
+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ if (exitingLoop != exitLoop) {
+ initReg = getRegister(&AMDIL::GPRI32RegClass);
+ assert(initReg != INVALIDREGNUM);
+ addLoopBreakInitReg(exitLoop, initReg);
+ while (exitingLoop != exitLoop && exitingLoop) {
+ addLoopBreakOnReg(exitingLoop, initReg);
+ exitingLoop = exitingLoop->getParentLoop();
+ }
+ assert(exitingLoop == exitLoop);
+ }
+
+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+ LoopT *contingLoop,
+ BlockT *contBlk,
+ LoopT *contLoop)
+{
+ if (DEBUGME) {
+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+ << " header = BB" << contBlk->getNumber() << "\n";
+
+ errs() << "Trying to continue loop-depth = "
+ << getLoopDepth(contLoop)
+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ if (contingLoop != contLoop) {
+ initReg = getRegister(&AMDIL::GPRI32RegClass);
+ assert(initReg != INVALIDREGNUM);
+ addLoopContInitReg(contLoop, initReg);
+ while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
+ contingLoop = contingLoop->getParentLoop();
+ }
+ assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+ addLoopContOnReg(contingLoop, initReg);
+ }
+
+ settleLoopcontBlock(contingBlk, contBlk, initReg);
+ //contingBlk->removeSuccessor(loopHeader);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk)
+{
+ if (DEBUGME) {
+ errs() << "serialPattern BB" << dstBlk->getNumber()
+ << " <= BB" << srcBlk->getNumber() << "\n";
+ }
+ //removeUnconditionalBranch(dstBlk);
+ dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
+
+ dstBlk->removeSuccessor(srcBlk);
+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+ removeSuccessor(srcBlk);
+ retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+ BlockT *curBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk)
+{
+ if (DEBUGME) {
+ errs() << "ifPattern BB" << curBlk->getNumber();
+ errs() << "{ ";
+ if (trueBlk) {
+ errs() << "BB" << trueBlk->getNumber();
+ }
+ errs() << " } else ";
+ errs() << "{ ";
+ if (falseBlk) {
+ errs() << "BB" << falseBlk->getNumber();
+ }
+ errs() << " }\n ";
+ errs() << "landBlock: ";
+ if (landBlk == NULL) {
+ errs() << "NULL";
+ } else {
+ errs() << "BB" << landBlk->getNumber();
+ }
+ errs() << "\n";
+ }
+
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc branchDL = branchInstr->getDebugLoc();
+ bool badDebugLoc = branchDL == DebugLoc() ? true : false;
+ if (badDebugLoc) {
+ InstrT * brkInstr = getLastBreakInstr(curBlk);
+ if (brkInstr) {
+ branchDL = brkInstr->getDebugLoc();
+ }
+ }
+
+// transform to
+// if cond
+// trueBlk
+// else
+// falseBlk
+// endif
+// landBlk
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(curBlk, branchInstr);
+ CFGTraits::insertCondBranchBefore(branchInstrPos,
+ CFGTraits::getBranchNzeroOpcode(oldOpcode),
+ passRep,
+ branchDL);
+
+ if (trueBlk) {
+ curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
+ curBlk->removeSuccessor(trueBlk);
+ if (landBlk && trueBlk->succ_size()!=0) {
+ trueBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, trueBlk);
+ }
+ if (badDebugLoc) {
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ELSE, passRep, branchDL);
+ } else {
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ELSE, passRep);
+ }
+
+ if (falseBlk) {
+ curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
+ falseBlk->end());
+ curBlk->removeSuccessor(falseBlk);
+ if (landBlk && falseBlk->succ_size() != 0) {
+ falseBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, falseBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep);
+
+ //curBlk->remove(branchInstrPos);
+ branchInstr->eraseFromParent();
+
+ if (landBlk && trueBlk && falseBlk) {
+ curBlk->addSuccessor(landBlk);
+ }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+ LoopLandInfo *loopLand)
+{
+ BlockT *landBlk = loopLand->landBlk;
+
+ if (DEBUGME) {
+ errs() << "loopPattern header = BB" << dstBlk->getNumber()
+ << " land = BB" << landBlk->getNumber() << "\n";
+ }
+
+ // Loop contInitRegs are init at the beginning of the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->contInitRegs.begin(),
+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ // Loop endbranchInitRegs are init after entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->endbranchInitRegs.begin(),
+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the
+ * BREAK_LOGICALZ_i32 or AMDIL::BREAK_LOGICALNZ statement in the current dstBlk.
+ * search for the DebugLoc in the that statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+ // fogbugz #7310: work-around discussed with Uri regarding do-while loops:
+ // in case the the WHILELOOP line number is greater than do.body line numbers,
+ // take the do.body line number instead.
+ MachineBasicBlock::iterator iter = dstBlk->begin();
+ MachineInstr *instrDoBody = &(*iter);
+ DebugLoc DLBreakDoBody = (instrDoBody) ? instrDoBody->getDebugLoc() : DebugLoc();
+ DebugLoc DLBreakMin = (DLBreak.getLine() < DLBreakDoBody.getLine()) ? DLBreak : DLBreakDoBody;
+
+ CFGTraits::insertInstrBefore(dstBlk, AMDIL::WHILELOOP, passRep, DLBreakMin);
+ // Loop breakInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakInitRegs.begin(),
+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+ * search for the DebugLoc in the continue statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrEnd(dstBlk, AMDIL::ENDLOOP, passRep, DLContinue);
+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+ // loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakOnRegs.begin(),
+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::BREAK_LOGICALNZ_i32, passRep,
+ *iter);
+ }
+
+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+ // loop.
+ for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::CONTINUE_LOGICALNZ_i32,
+ passRep, *iter);
+ }
+
+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
+ }
+
+ removeSuccessor(landBlk);
+ retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+ BlockT *exitBlk,
+ BlockT *exitLandBlk,
+ RegiT setReg)
+{
+ if (DEBUGME) {
+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+ << " exit = BB" << exitBlk->getNumber()
+ << " land = BB" << exitLandBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+
+ // transform exitingBlk to
+ // if ( ) {
+ // exitBlk (if exitBlk != exitLandBlk)
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+ //break_logical
+ int newOpcode =
+ (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
+ : CFGTraits::getBreakZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ } else {
+ int newOpcode =
+ (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+ exitBlk->end());
+ }
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::BREAK, passRep);
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep);
+ } //if_logical
+
+ //now branchInst can be erase safely
+ //exitingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+
+ //now take care of successors, retire blocks
+ exitingBlk->removeSuccessor(exitBlk);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitBlk->removeSuccessor(exitLandBlk);
+ retireBlock(exitingBlk, exitBlk);
+ }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+ BlockT *contBlk,
+ RegiT setReg)
+{
+ if (DEBUGME) {
+ errs() << "settleLoopcontBlock conting = BB"
+ << contingBlk->getNumber()
+ << ", cont = BB" << contBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+ if (branchInstr) {
+ assert(CFGTraits::isCondBranch(branchInstr));
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(contingBlk, branchInstr);
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ // transform contingBlk to
+ // if () {
+ // move instr after branchInstr
+ // continue
+ // or
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(contingBlk) - loopHeader}
+
+ bool useContinueLogical =
+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+ if (useContinueLogical == false) {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep,
+ DL);
+
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep, DL);
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep, DL);
+ }
+
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::ENDIF, passRep, DL);
+ } else {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+ : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode,
+ passRep, DL);
+ }
+
+ //contingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+ } else {
+ /* if we've arrived here then we've already erased the branch instruction
+ * travel back up the basic block to see the last reference of our debug location
+ * we've just inserted that reference here so it should be representative */
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep,
+ CFGTraits::getLastDebugLocInBB(contingBlk));
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep,
+ CFGTraits::getLastDebugLocInBB(contingBlk));
+ }
+ } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+// (1) set newBlk common successor of BBs in exitBlkSet
+// (2) change the continue-instr in BBs in exitBlkSet to break-instr
+// (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+ LoopT *loopRep,
+ std::set<BlockT *> &exitBlkSet,
+ BlockT *exitLandBlk)
+{
+ std::set<BlockT *> endBlkSet;
+
+// BlockT *parentLoopHead = parentLoopRep->getHeader();
+
+
+ for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+ return NULL;
+
+ endBlkSet.insert(endBlk);
+ }
+
+ BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newBlk); //insert to function
+ CFGTraits::insertInstrEnd(newBlk, AMDIL::CONTINUE, passRep);
+ SHOWNEWBLK(newBlk, "New continue block: ");
+
+ for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+ iterEnd = endBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *endBlk = *iter;
+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+ if (contInstr) {
+ contInstr->eraseFromParent();
+ }
+ endBlk->addSuccessor(newBlk);
+ if (DEBUGME) {
+ errs() << "Add new continue Block to BB"
+ << endBlk->getNumber() << " successors\n";
+ }
+ }
+
+ return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+ BlockTSmallerVector &exitingBlks,
+ BlockTSmallerVector &exitBlks)
+{
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ RegiT endBranchReg = getRegister(&AMDIL::GPRI32RegClass);
+ assert(endBranchReg != INVALIDREGNUM);
+
+ // reg = 0 before entering the loop
+ addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+ uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+ assert(numBlks >=2 && numBlks == exitBlks.size());
+
+ BlockT *preExitingBlk = exitingBlks[0];
+ BlockT *preExitBlk = exitBlks[0];
+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(preBranchBlk); //insert to function
+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+ BlockT *newLandBlk = preBranchBlk;
+
+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+ newLandBlk);
+ preExitingBlk->removeSuccessor(preExitBlk);
+ preExitingBlk->addSuccessor(newLandBlk);
+
+ //it is redundant to add reg = 0 to exitingBlks[0]
+
+ // For 1..n th exiting path (the last iteration handles two pathes) create the
+ // branch to the previous path and the current path.
+ for (uint32_t i = 1; i < numBlks; ++i) {
+ BlockT *curExitingBlk = exitingBlks[i];
+ BlockT *curExitBlk = exitBlks[i];
+ BlockT *curBranchBlk;
+
+ if (i == numBlks - 1) {
+ curBranchBlk = curExitBlk;
+ } else {
+ curBranchBlk = funcRep->CreateMachineBasicBlock();
+ addToTraversalBlock(curBranchBlk);
+ funcRep->push_back(curBranchBlk); //insert to function
+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+ }
+
+ // Add reg = i to exitingBlks[i].
+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, endBranchReg, i);
+
+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+ // (exitingBlks[i], newLandBlk).
+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+ newLandBlk);
+ curExitingBlk->removeSuccessor(curExitBlk);
+ curExitingBlk->addSuccessor(newLandBlk);
+
+ // add to preBranchBlk the branch instruction:
+ // if (endBranchReg == preVal)
+ // preExitBlk
+ // else
+ // curBranchBlk
+ //
+ // preValReg = i - 1
+
+ DebugLoc DL;
+ RegiT preValReg = getRegister(&AMDIL::GPRI32RegClass);
+ MachineInstr* preValInst
+ = BuildMI(preBranchBlk, DL, tii->get(AMDIL::LOADCONST_i32), preValReg)
+ .addImm(i - 1); //preVal
+ SHOWNEWINSTR(preValInst);
+
+ // condResReg = (endBranchReg == preValReg)
+ RegiT condResReg = getRegister(&AMDIL::GPRI32RegClass);
+ MachineInstr* cmpInst
+ = BuildMI(preBranchBlk, DL, tii->get(AMDIL::IEQ), condResReg)
+ .addReg(endBranchReg).addReg(preValReg);
+ SHOWNEWINSTR(cmpInst);
+
+ MachineInstr* condBranchInst
+ = BuildMI(preBranchBlk, DL, tii->get(AMDIL::BRANCH_COND_i32))
+ .addMBB(preExitBlk).addReg(condResReg);
+ SHOWNEWINSTR(condBranchInst);
+
+ preBranchBlk->addSuccessor(preExitBlk);
+ preBranchBlk->addSuccessor(curBranchBlk);
+
+ // Update preExitingBlk, preExitBlk, preBranchBlk.
+ preExitingBlk = curExitingBlk;
+ preExitBlk = curExitBlk;
+ preBranchBlk = curBranchBlk;
+
+ } //end for 1 .. n blocks
+
+ return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry)
+{
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return Not_SinglePath;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return SinglePath_NotInPath;
+ }
+
+ return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry)
+{
+ //assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return srcBlk;
+ }
+
+ if (srcBlk->succ_size() == 0) {
+ return srcBlk;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ BlockT *preBlk = srcBlk;
+
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == NULL) {
+ return preBlk;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return NULL;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return srcBlk;
+ }
+
+ return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+ BlockT *dstBlk)
+{
+ int cloned = 0;
+ assert(preBlk->isSuccessor(srcBlk));
+ while (srcBlk && srcBlk != dstBlk) {
+ assert(srcBlk->succ_size() == 1 ||
+ (srcBlk->succ_size() == 0 && dstBlk == NULL));
+ if (srcBlk->pred_size() > 1) {
+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+ ++cloned;
+ }
+
+ preBlk = srcBlk;
+ if (srcBlk->succ_size() == 1)
+ srcBlk = *srcBlk->succ_begin();
+ else
+ srcBlk = NULL;
+ }
+
+ return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+ BlockT *predBlk)
+{
+ assert(predBlk->isSuccessor(curBlk) &&
+ "succBlk is not a prececessor of curBlk");
+
+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+ //srcBlk, oldBlk, newBlk
+
+ predBlk->removeSuccessor(curBlk);
+ predBlk->addSuccessor(cloneBlk);
+
+ // add all successor to cloneBlk
+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+ numClonedInstr += curBlk->size();
+
+ if (DEBUGME) {
+ errs() << "Cloned block: " << "BB"
+ << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+ }
+
+ SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+ return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+ BlockT *exitingBlk)
+{
+ BlockT *exitBlk = NULL;
+
+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+ iterSuccEnd = exitingBlk->succ_end();
+ iterSucc != iterSuccEnd; ++iterSucc) {
+ BlockT *curBlk = *iterSucc;
+ if (!loopRep->contains(curBlk)) {
+ assert(exitBlk == NULL);
+ exitBlk = curBlk;
+ }
+ }
+
+ assert(exitBlk != NULL);
+
+ return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+ BlockT *dstBlk,
+ InstrIterator insertPos)
+{
+ InstrIterator spliceEnd;
+ //look for the input branchinstr, not the AMDIL branchinstr
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ if (branchInstr == NULL) {
+ if (DEBUGME) {
+ errs() << "migrateInstruction don't see branch instr\n" ;
+ }
+ spliceEnd = srcBlk->end();
+ } else {
+ if (DEBUGME) {
+ errs() << "migrateInstruction see branch instr\n" ;
+ branchInstr->dump();
+ }
+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+ }
+ if (DEBUGME) {
+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+
+ //splice insert before insertPos
+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+ if (DEBUGME) {
+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+// B1:
+// uncond_br LoopHeader
+//
+// to
+// B1:
+// cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep)
+{
+ BlockT *loopHeader;
+ BlockT *loopLatch;
+ loopHeader = LoopRep->getHeader();
+ loopLatch = LoopRep->getLoopLatch();
+ BlockT *dummyExitBlk = NULL;
+ if (loopHeader!=NULL && loopLatch!=NULL) {
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+ dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos(loopLatch, branchInstr);
+ unsigned immReg = getRegister(&AMDIL::GPRI32RegClass);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+ InstrT *newInstr =
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::BRANCH_COND_i32,
+ passRep);
+ MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
+ SHOWNEWINSTR(newInstr);
+ branchInstr->eraseFromParent();
+ loopLatch->addSuccessor(dummyExitBlk);
+ }
+ }
+
+ return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk)
+{
+ InstrT *branchInstr;
+
+ // I saw two unconditional branch in one basic block in example
+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+ && CFGTraits::isUncondBranch(branchInstr)) {
+ if (DEBUGME) {
+ errs() << "Removing unconditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk)
+{
+ if (srcBlk->succ_size() == 2) {
+ BlockT *blk1 = *srcBlk->succ_begin();
+ BlockT *blk2 = *(srcBlk->succ_begin()+1);
+
+ if (blk1 == blk2) {
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+ if (DEBUGME) {
+ errs() << "Removing unneeded conditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ SHOWNEWBLK(blk1, "Removing redundant successor");
+ srcBlk->removeSuccessor(blk1);
+ }
+ }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+ DEFAULT_VEC_SLOTS> &retBlks)
+{
+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDIL::RETURN, passRep);
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+ retBlks.begin(),
+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+ if (curInstr) {
+ curInstr->eraseFromParent();
+ }
+#if 0
+ if (curBlk->size()==0 && curBlk->pred_size() == 1) {
+ if (DEBUGME) {
+ errs() << "Replace empty block BB" << curBlk->getNumber()
+ << " with dummyExitBlock\n";
+ }
+ BlockT *predb = *curBlk->pred_begin();
+ predb->removeSuccessor(curBlk);
+ curBlk = predb;
+ } //handle empty curBlk
+#endif
+ curBlk->addSuccessor(dummyExitBlk);
+ if (DEBUGME) {
+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+ << " successors\n";
+ }
+ } //for
+
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk)
+{
+ while (srcBlk->succ_size()) {
+ srcBlk->removeSuccessor(*srcBlk->succ_begin());
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum)
+{
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk)
+{
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::addToTraversalBlock(BlockT *srcBlk)
+{
+ if (DEBUGME) {
+ errs() << "AddToTraversal BB" << srcBlk->getNumber() << "\n";
+ }
+
+ addedToTraversalBlks.push_back(srcBlk);
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk)
+{
+ if (DEBUGME) {
+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+ }
+
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->isRetired = true;
+ //int i = srcBlk->succ_size();
+ //int j = srcBlk->pred_size();
+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+ && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk)
+{
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk)
+{
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+ if(loopLand == NULL)
+ return true;
+
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ return true;
+ }
+
+ loopRep = loopRep->getParentLoop();
+ }
+
+ return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk)
+{
+ const unsigned blockSizeThreshold = 30;
+ const unsigned cloneInstrThreshold = 100;
+
+ bool multiplePreds = blk && (blk->pred_size() > 1);
+
+ if(!multiplePreds)
+ return false;
+
+ unsigned blkSize = blk->size();
+ return ((blkSize > blockSizeThreshold)
+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+ BlockTSmallerVector &exitBlks,
+ std::set<BlockT *> &exitBlkSet)
+{
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
+
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end();
+ predIter != predIterEnd; ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+ inpathBlks.push_back(curBlk);
+ }
+ } //for
+
+ //if landBlk has predecessors that are not in the given loop,
+ //create a new block
+ BlockT *newLandBlk = landBlk;
+ if (inpathBlks.size() != landBlk->pred_size()) {
+ newLandBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newLandBlk); //insert to function
+ newLandBlk->addSuccessor(landBlk);
+ for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+ inpathBlks.begin(),
+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+ //srcBlk, oldBlk, newBlk
+ curBlk->removeSuccessor(landBlk);
+ curBlk->addSuccessor(newLandBlk);
+ }
+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+ if (exitBlks[i] == landBlk) {
+ exitBlks[i] = newLandBlk;
+ }
+ }
+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+ }
+
+ setLoopLandBlock(loopRep, newLandBlk);
+
+ return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk)
+{
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ assert(theEntry->landBlk == NULL);
+
+ if (blk == NULL) {
+ blk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(blk); //insert to function
+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+ }
+
+ theEntry->landBlk = blk;
+
+ if (DEBUGME) {
+ errs() << "setLoopLandBlock loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " landing-block = BB" << blk->getNumber() << "\n";
+ }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum)
+{
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+
+ theEntry->breakOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum)
+{
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum)
+{
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->breakInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum)
+{
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+ RegiT regNum)
+{
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->endbranchInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopEndbranchInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep)
+{
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep)
+{
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk)
+{
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ if (loopRep == NULL)
+ return false;
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep)
+{
+ return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd)
+{
+ int count = 0;
+ while (iterStart != iterEnd) {
+ if (!isRetiredBlock(*iterStart)) {
+ ++count;
+ }
+ ++iterStart;
+ }
+
+ return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2)
+{
+
+ if (postDomTree->dominates(blk1, blk2)) {
+ return blk1;
+ }
+ if (postDomTree->dominates(blk2, blk1)) {
+ return blk2;
+ }
+
+ DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+ DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+ // Handle newly cloned node.
+ if (node1 == NULL && blk1->succ_size() == 1) {
+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+ }
+ if (node2 == NULL && blk2->succ_size() == 1) {
+ return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+ }
+
+ if (node1 == NULL || node2 == NULL) {
+ return NULL;
+ }
+
+ node1 = node1->getIDom();
+ while (node1) {
+ if (postDomTree->dominates(node1, node2)) {
+ return node1->getBlock();
+ }
+ node1 = node1->getIDom();
+ }
+
+ return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks)
+{
+ BlockT *commonDom;
+ typename std::set<BlockT *>::const_iterator iter = blks.begin();
+ typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+ BlockT *curBlk = *iter;
+ if (curBlk != commonDom) {
+ commonDom = findNearestCommonPostDom(curBlk, commonDom);
+ }
+ }
+
+ if (DEBUGME) {
+ errs() << "Common post dominator for exit blocks is ";
+ if (commonDom) {
+ errs() << "BB" << commonDom->getNumber() << "\n";
+ } else {
+ errs() << "NULL\n";
+ }
+ }
+
+ return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDIL
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGStructurizer : public MachineFunctionPass
+{
+public:
+ typedef MachineInstr InstructionType;
+ typedef MachineFunction FunctionType;
+ typedef MachineBasicBlock BlockType;
+ typedef MachineLoopInfo LoopinfoType;
+ typedef MachineDominatorTree DominatortreeType;
+ typedef MachinePostDominatorTree PostDominatortreeType;
+ typedef MachineDomTreeNode DomTreeNodeType;
+ typedef MachineLoop LoopType;
+//private:
+ const TargetInstrInfo *TII;
+
+//public:
+// static char ID;
+
+public:
+ AMDILCFGStructurizer(char &pid);
+ const TargetInstrInfo *getTargetInstrInfo() const;
+ // this is abstract base class
+ virtual bool runOnMachineFunction(MachineFunction &F) = 0;
+
+private:
+
+}; //end of class AMDILCFGStructurizer
+
+//char AMDILCFGStructurizer::ID = 0;
+} //end of namespace llvm
+AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid)
+ : MachineFunctionPass(pid), TII(NULL)
+{
+}
+
+const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const
+{
+ return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+
+extern void initializeAMDILCFGPreparePass(PassRegistry&);
+
+class AMDILCFGPrepare : public AMDILCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDILCFGPrepare();
+
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGPrepare
+
+char AMDILCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDILCFGPrepare::AMDILCFGPrepare()
+ : AMDILCFGStructurizer(ID)
+{
+ initializeAMDILCFGPreparePass(*PassRegistry::getPassRegistry());
+}
+const char *AMDILCFGPrepare::getPassName() const
+{
+ return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+
+extern void initializeAMDILCFGPerformPass(PassRegistry&);
+
+class AMDILCFGPerform : public AMDILCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDILCFGPerform();
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGPerform
+
+char AMDILCFGPerform::ID = 0;
+} //end of namespace llvm
+
+AMDILCFGPerform::AMDILCFGPerform()
+ : AMDILCFGStructurizer(ID)
+{
+ initializeAMDILCFGPerformPass(*PassRegistry::getPassRegistry());
+}
+
+const char *AMDILCFGPerform::getPassName() const
+{
+ return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDILCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// this class is tailor to the AMDIL backend
+template<>
+struct CFGStructTraits<AMDILCFGStructurizer> {
+ typedef int RegiT;
+
+ static int getBreakNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBreakZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+// the explicitly represented branch target is the true branch target
+#define getExplicitBranch getTrueBranch
+#define setExplicitBranch setTrueBranch
+
+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+ return instr->getOperand(0).getMBB();
+ }
+
+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+ instr->getOperand(0).setMBB(blk);
+ }
+
+ static MachineBasicBlock *
+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(blk->succ_size() == 2);
+ MachineBasicBlock *trueBranch = getTrueBranch(instr);
+ MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+ MachineBasicBlock::succ_iterator iterNext = iter;
+ ++iterNext;
+
+ return (*iter == trueBranch) ? *iterNext : *iter;
+ }
+
+ static bool isCondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isUncondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDIL::BRANCH:
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isPhimove(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::MOVE);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
+ DebugLoc DL;
+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getDebugLoc().isUnknown() == false) {
+ DL = instr->getDebugLoc();
+ }
+ }
+ return DL;
+ }
+
+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ MachineInstr *instr = &*iter;
+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+ return instr;
+ }
+ return NULL;
+ }
+
+ // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+ //
+ // BB with backward-edge could have move instructions after the branch
+ // instruction. Such move instruction "belong to" the loop backward-edge.
+ //
+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+ iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+ // FIXME: Simplify
+ MachineInstr *instr = &*iter;
+ if (instr) {
+ if (isCondBranch(instr) || isUncondBranch(instr)) {
+ return instr;
+ } else if (!isPhimove(instr)) {
+ break;
+ }
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDIL::RETURN) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDIL::CONTINUE) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDIL::BREAK_LOGICALZ_i32)) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static bool isReturnBlock(MachineBasicBlock *blk) {
+ MachineInstr *instr = getReturnInstr(blk);
+ bool isReturn = (blk->succ_size() == 0);
+ if (instr) {
+ assert(isReturn);
+ } else if (isReturn) {
+ if (DEBUGME) {
+ errs() << "BB" << blk->getNumber()
+ <<" is return block without RETURN instr\n";
+ }
+ }
+
+ return isReturn;
+ }
+
+ static MachineBasicBlock::iterator
+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(instr->getParent() == blk && "instruction doesn't belong to block");
+ MachineBasicBlock::iterator iter = blk->begin();
+ MachineBasicBlock::iterator iterEnd = blk->end();
+ while (&(*iter) != instr && iter != iterEnd) {
+ ++iter;
+ }
+
+ assert(iter != iterEnd);
+ return iter;
+ }//getInstrPos
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrBefore
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ MachineBasicBlock::iterator res;
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrEnd
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr = blk->getParent()
+ ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->push_back(newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ } //insertInstrEnd
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ DebugLoc DL) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->insert(instrPos, newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ return newInstr;
+ } //insertInstrBefore
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ return insertInstrBefore(instrPos, newOpcode, passRep, DebugLoc());
+ } //insertInstrBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ DebugLoc DL) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DL);
+
+ blk->insert(instrPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
+ false);
+
+ SHOWNEWINSTR(newInstr);
+ //erase later oldInstr->eraseFromParent();
+ } //insertCondBranchBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator insertPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum,
+ DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ //insert before
+ blk->insert(insertPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchBefore
+
+ static void insertCondBranchEnd(MachineBasicBlock *blk,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+ blk->push_back(newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchEnd
+
+
+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+ DebugLoc());
+ MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addImm(regVal); //set src value
+
+ blk->insert(instrPos, newInstr);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertAssignInstrBefore
+
+ static void insertAssignInstrBefore(MachineBasicBlock *blk,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+ DebugLoc());
+ MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addImm(regVal); //set src value
+
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ } //insertInstrBefore
+
+ static void insertCompareInstrBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator instrPos,
+ AMDILCFGStructurizer *passRep,
+ RegiT dstReg, RegiT src1Reg,
+ RegiT src2Reg) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::IEQ), DebugLoc());
+
+ MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
+ MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
+
+ blk->insert(instrPos, newInstr);
+ SHOWNEWINSTR(newInstr);
+
+ } //insertCompareInstrBefore
+
+ static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+ MachineBasicBlock *srcBlk) {
+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
+ }
+ } //cloneSuccessorList
+
+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+ MachineFunction *func = srcBlk->getParent();
+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+ func->push_back(newBlk); //insert to function
+ //newBlk->setNumber(srcBlk->getNumber());
+ for (MachineBasicBlock::const_instr_iterator iter = srcBlk->instr_begin(),
+ iterEnd = srcBlk->instr_end();
+ iter != iterEnd; ++iter) {
+ MachineInstr *instr = func->CloneMachineInstr(iter);
+ // This is a workaround for LLVM bugzilla 8420 because CloneMachineInstr
+ // does not clone the AsmPrinterFlags.
+ instr->setAsmPrinterFlag(
+ (llvm::MachineInstr::CommentFlag)iter->getAsmPrinterFlags());
+ newBlk->push_back(instr);
+ }
+ return newBlk;
+ }
+
+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+ //the AMDIL instruction is not recognized as terminator fix this and retire
+ //this routine
+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+ MachineBasicBlock *oldBlk,
+ MachineBasicBlock *newBlk) {
+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+ if (branchInstr && isCondBranch(branchInstr) &&
+ getExplicitBranch(branchInstr) == oldBlk) {
+ setExplicitBranch(branchInstr, newBlk);
+ }
+ }
+
+ static void wrapup(MachineBasicBlock *entryBlk) {
+ assert((!entryBlk->getParent()->getJumpTableInfo()
+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+ && "found a jump table");
+
+ //collect continue right before endloop
+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+ MachineBasicBlock::iterator pre = entryBlk->begin();
+ MachineBasicBlock::iterator iterEnd = entryBlk->end();
+ MachineBasicBlock::iterator iter = pre;
+ while (iter != iterEnd) {
+ if (pre->getOpcode() == AMDIL::CONTINUE
+ && iter->getOpcode() == AMDIL::ENDLOOP) {
+ contInstr.push_back(pre);
+ }
+ pre = iter;
+ ++iter;
+ } //end while
+
+ //delete continue right before endloop
+ for (unsigned i = 0; i < contInstr.size(); ++i) {
+ contInstr[i]->eraseFromParent();
+ }
+
+ // TODO to fix up jump table so later phase won't be confused. if
+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+ // there isn't such an interface yet. alternatively, replace all the other
+ // blocks in the jump table with the entryBlk //}
+
+ } //wrapup
+
+ static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineDominatorTree>();
+ }
+
+ static MachinePostDominatorTree*
+ getPostDominatorTree(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachinePostDominatorTree>();
+ }
+
+ static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineLoopInfo>();
+ }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(AMDILCFGPrepare, "amdcfgprepare",
+ "AMD IL Control Flow Graph Preparation Pass",
+ false, false);
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree);
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree);
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo);
+INITIALIZE_PASS_END(AMDILCFGPrepare, "amdcfgprepare",
+ "AMD IL Control Flow Graph Preparation Pass",
+ false, false)
+
+INITIALIZE_PASS_BEGIN(AMDILCFGPerform, "amdcfgperform",
+ "AMD IL Control Flow Graph structurizer Pass",
+ false, false);
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree);
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree);
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo);
+INITIALIZE_PASS_END(AMDILCFGPerform, "amdcfgperform",
+ "AMD IL Control Flow Graph structurizer Pass",
+ false, false)
+
+namespace llvm
+{
+FunctionPass *createAMDILCFGPreparationPass();
+FunctionPass *createAMDILCFGStructurizerPass();
+}
+
+// createAMDILCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGPreparationPass()
+{
+ return new AMDILCFGPrepare();
+}
+
+bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func)
+{
+ TII = func.getTarget().getInstrInfo();
+ return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func,
+ *this);
+}
+
+// createAMDILCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGStructurizerPass()
+{
+ return new AMDILCFGPerform();
+}
+
+bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func)
+{
+ TII = func.getTarget().getInstrInfo();
+ return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func,
+ *this);
+}
+
+//end of file newline goes below
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCallingConv.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCallingConv.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCallingConv.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,50 @@
+//===-- AMDILCallingConv.td -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMDIL architectures.
+//
+//===----------------------------------------------------------------------===//
+
+def RetCC_AMDIL32 : CallingConv<[
+ // Since IL has no return values, all values can be emulated on the stack
+ // The stack can then be mapped to a number of sequential virtual registers
+ // in IL
+
+ CCIfType<[i1, i8, i16, i32, f32], CCAssignToReg<
+ [ include "AMDILRegisterUsesScalar.td" ]> >,
+
+ CCIfType<[v2i32, v2f32, v2i8, v2i16, f64, i64], CCAssignToReg<
+ [ include "AMDILRegisterUsesV2.td" ]> >,
+
+ CCIfType<[v4i32, v4f32, v4i8, v4i16, v2f64, v2i64], CCAssignToReg<
+ [ include "AMDILRegisterUsesV4.td" ]> >,
+
+ CCAssignToStack<16, 16>
+ ]>;
+
+// AMDIL 32-bit C Calling convention.
+def CC_AMDIL32 : CallingConv<[
+ // CCIfByVal<CCPassByVal<4, 4>>,
+ // Since IL has parameter values, all values can be emulated on the stack
+ // The stack can then be mapped to a number of sequential virtual registers
+ // in IL
+
+ CCIfType<[i1, i8, i16, i32, f32], CCAssignToReg<
+ [ include "AMDILRegisterUsesScalar.td" ]> >,
+
+ CCIfType<[v2i32, v2f32, v2i8, v2i16, f64, i64], CCAssignToReg<
+ [ include "AMDILRegisterUsesV2.td" ]> >,
+
+ CCIfType<[v4i32, v4f32, v4i8, v4i16, v2f64, v2i64], CCAssignToReg<
+ [ include "AMDILRegisterUsesV4.td" ]> >,
+
+ CCAssignToStack<16, 16>
+ ]>;
+
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,87 @@
+//===-- AMDILCompilerErrors.h ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_COMPILER_ERRORS_H_
+#define _AMDIL_COMPILER_ERRORS_H_
+// Compiler errors generated by the backend that will cause
+// the runtime to abort compilation. These are mainly for
+// device constraint violations or invalid code.
+namespace amd
+{
+
+#define INVALID_COMPUTE 0
+#define GENERIC_ERROR 1
+#define INTERNAL_ERROR 2
+#define MISSING_FUNCTION_CALL 3
+#define RESERVED_FUNCTION 4
+#define BYTE_STORE_ERROR 5
+#define UNKNOWN_TYPE_NAME 6
+#define NO_IMAGE_SUPPORT 7
+#define NO_ATOMIC_32 8
+#define NO_ATOMIC_64 9
+#define IRREDUCIBLE_CF 10
+#define INSUFFICIENT_RESOURCES 11
+#define INSUFFICIENT_LOCAL_RESOURCES 12
+#define INSUFFICIENT_PRIVATE_RESOURCES 13
+#define INSUFFICIENT_IMAGE_RESOURCES 14
+#define DOUBLE_NOT_SUPPORTED 15
+#define INVALID_CONSTANT_WRITE 16
+#define INSUFFICIENT_CONSTANT_RESOURCES 17
+#define INSUFFICIENT_COUNTER_RESOURCES 18
+#define INSUFFICIENT_REGION_RESOURCES 19
+#define REGION_MEMORY_ERROR 20
+#define MEMOP_NO_ALLOCATION 21
+#define RECURSIVE_FUNCTION 22
+#define INCORRECT_COUNTER_USAGE 23
+#define INVALID_INTRINSIC_USAGE 24
+#define INSUFFICIENT_SEMAPHORE_RESOURCES 25
+#define NO_SEMAPHORE_SUPPORT 26
+#define INVALID_INIT_VALUE 27
+#define NO_FLAT_SUPPORT 28
+#define NUM_ERROR_MESSAGES 29
+
+static const char *CompilerErrorMessage[NUM_ERROR_MESSAGES] = {
+ "E000:Compute Shader Not Supported! ",
+ "E001:Generic Compiler Error Message! ",
+ "E002:Internal Compiler Error Message!",
+ "E003:Missing Function Call Detected! ",
+ "E004:Reserved Function Call Detected!",
+ "E005:Byte Addressable Stores Invalid!",
+ "E006:Kernel Arg Type Name Is Invalid!",
+ "E007:Image Extension Unsupported! ",
+ "E008:32bit Atomic Op are Unsupported!",
+ "E009:64bit Atomic Op are Unsupported!",
+ "E010:Irreducible ControlFlow Detected",
+ "E011:Insufficient Resources Detected!",
+ "E012:Insufficient Local Resources! ",
+ "E013:Insufficient Private Resources! ",
+ "E014:Images not currently supported! ",
+ "E015:Double precision not supported! ",
+ "E016:Invalid Constant Memory Write! ",
+ "E017:Max number Constant Ptr reached!",
+ "E018:Max number of Counters reached! ",
+ "E019:Insufficient Region Resources! ",
+ "E020:Region address space invalid! ",
+ "E021:MemOp with no memory allocated! ",
+ "E022:Recursive Function detected! ",
+ "E023:Illegal Inc+Dec to same counter!",
+ "E024:Illegal usage of intrinsic inst!",
+ "E025:Insufficient Semaphore Resources",
+ "E026:Semaphores not supported! ",
+ "E027:Semaphore init value is invalid!",
+ "E028:Flat address is not supported! "
+};
+
+}
+
+#endif // _AMDIL_COMPILER_ERRORS_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,37 @@
+//===-- AMDILCompilerWarnings.h -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_COMPILER_WARNINGS_H_
+#define _AMDIL_COMPILER_WARNINGS_H_
+/// Compiler backend generated warnings that might cause
+/// issues with compilation. These warnings become errors if
+/// -Werror is specified on the command line.
+namespace amd
+{
+
+#define LIMIT_BARRIER 0
+#define BAD_BARRIER_OPT 1
+#define RECOVERABLE_ERROR 2
+#define NUM_WARN_MESSAGES 3
+
+static const char *CompilerWarningMessage[NUM_WARN_MESSAGES] = {
+ /// All warnings must be prefixed with the W token or they might be
+ /// treated as errors.
+ "W000:Barrier caused limited groupsize",
+ "W001:Dangerous Barrier Opt Detected! ",
+ "W002:Recoverable BE Error Detected! "
+
+};
+}
+
+#endif // _AMDIL_COMPILER_WARNINGS_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1043 @@
+//===-- AMDILConversions.td -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def actos_i16:Pat < (i16 (anyext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def uctos_i16:Pat < (i16 (zext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def sctos_i16:Pat < (i16 (sext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def actoi_i32:Pat < (i32 (anyext GPRI8:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def uctoi_i32:Pat < (i32 (zext GPRI8:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def sctoi_i32:Pat < (i32 (sext GPRI8:$src)),
+(IL_ASINT_i32
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def actol_i64:Pat < (i64 (anyext GPRI8:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 0)) >;
+
+
+def uctol_i64:Pat < (i64 (zext GPRI8:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 0)) >;
+
+
+def sctol_i64:Pat < (i64 (sext GPRI8:$src)),
+(LCREATE
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)),
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 31))) >;
+
+
+def astoi_i32:Pat < (i32 (anyext GPRI16:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))) >;
+
+
+def ustoi_i32:Pat < (i32 (zext GPRI16:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))) >;
+
+
+def sstoi_i32:Pat < (i32 (sext GPRI16:$src)),
+(IL_ASINT_i32
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))) >;
+
+
+def astol_i64:Pat < (i64 (anyext GPRI16:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 0)) >;
+
+
+def ustol_i64:Pat < (i64 (zext GPRI16:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 0)) >;
+
+
+def sstol_i64:Pat < (i64 (sext GPRI16:$src)),
+(LCREATE
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)),
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 31))) >;
+
+
+def aitol_i64:Pat < (i64 (anyext GPRI32:$src)),
+(LCREATE
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)) >;
+
+
+def uitol_i64:Pat < (i64 (zext GPRI32:$src)),
+(LCREATE
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)) >;
+
+
+def sitol_i64:Pat < (i64 (sext GPRI32:$src)),
+(LCREATE
+(IL_ASINT_i32 GPRI32:$src),
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 31))) >;
+
+
+
+def sctof_f32:Pat < (f32 (sint_to_fp GPRI8:$src)),
+(f32
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)))) >;
+
+
+def uctof_f32:Pat < (f32 (uint_to_fp GPRI8:$src)),
+(f32
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)))) >;
+
+
+def ftosc_i8:Pat < (i8 (fp_to_sint GPRF32:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOI GPRF32:$src),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def ftouc_i8:Pat < (i8 (fp_to_uint GPRF32:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOU GPRF32:$src),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def sctod_f64:Pat < (f64 (sint_to_fp GPRI8:$src)),
+(f64 (FTOD
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))))) >;
+
+
+def uctod_f64:Pat < (f64 (uint_to_fp GPRI8:$src)),
+(f64 (FTOD
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))))) >;
+
+
+def dtosc_i8:Pat < (i8 (fp_to_sint GPRF64:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOI (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def dtouc_i8:Pat < (i8 (fp_to_uint GPRF64:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOU (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def sstof_f32:Pat < (f32 (sint_to_fp GPRI16:$src)),
+(f32
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)))) >;
+
+
+def ustof_f32:Pat < (f32 (uint_to_fp GPRI16:$src)),
+(f32
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)))) >;
+
+
+def ftoss_i16:Pat < (i16 (fp_to_sint GPRF32:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOI GPRF32:$src),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def ftous_i16:Pat < (i16 (fp_to_uint GPRF32:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOU GPRF32:$src),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def sstod_f64:Pat < (f64 (sint_to_fp GPRI16:$src)),
+(f64 (FTOD
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))))) >;
+
+
+def ustod_f64:Pat < (f64 (uint_to_fp GPRI16:$src)),
+(f64 (FTOD
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))))) >;
+
+
+def dtoss_i16:Pat < (i16 (fp_to_sint GPRF64:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOI (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def dtous_i16:Pat < (i16 (fp_to_uint GPRF64:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOU (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+
+
+
+def stoc_i8:Pat < (i8 (trunc GPRI16:$src)),
+(IL_ASCHAR_i32
+ (SHR_i32
+ (SHL_i32
+ (IL_ASINT_i16
+(BINARY_AND_i16 GPRI16:$src,
+ (LOADCONST_i16 0x000000FF))),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))
+ )>;
+
+
+def itoc_i8:Pat < (i8 (trunc GPRI32:$src)),
+(IL_ASCHAR_i32
+ (SHR_i32
+ (SHL_i32
+ (IL_ASINT_i32
+(BINARY_AND_i32 GPRI32:$src,
+ (LOADCONST_i32 0x000000FF)))
+ , (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))
+ ) >;
+
+
+def itos_i16:Pat < (i16 (trunc GPRI32:$src)),
+(IL_ASSHORT_i32
+ (SHR_i32
+ (SHL_i32
+ (IL_ASINT_i32
+(BINARY_AND_i32 GPRI32:$src,
+ (LOADCONST_i32 0x0000FFFF)))
+ , (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))
+ ) >;
+
+
+def ltoc_i8:Pat < (i8 (trunc GPRI64:$src)),
+(IL_ASCHAR_i32
+ (SHR_i32
+ (SHL_i32
+ (BINARY_AND_i32
+(LLO GPRI64:$src),
+ (LOADCONST_i32 0x000000FF))
+ , (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))
+ ) >;
+
+
+def ltos_i16:Pat < (i16 (trunc GPRI64:$src)),
+(IL_ASSHORT_i32
+ (SHR_i32
+ (SHL_i32
+ (BINARY_AND_i32
+(LLO GPRI64:$src),
+ (LOADCONST_i32 0x0000FFFF))
+ , (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))
+ ) >;
+
+
+def ltoi_i32:Pat < (i32 (trunc GPRI64:$src)), (LLO GPRI64:$src) >;
+
+def actos_v2i16:Pat < (v2i16 (anyext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def uctos_v2i16:Pat < (v2i16 (zext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def sctos_v2i16:Pat < (v2i16 (sext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def actoi_v2i32:Pat < (v2i32 (anyext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def uctoi_v2i32:Pat < (v2i32 (zext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def sctoi_v2i32:Pat < (v2i32 (sext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def actol_v2i64:Pat < (v2i64 (anyext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def uctol_v2i64:Pat < (v2i64 (zext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sctol_v2i64:Pat < (v2i64 (sext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+def astoi_v2i32:Pat < (v2i32 (anyext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def ustoi_v2i32:Pat < (v2i32 (zext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def sstoi_v2i32:Pat < (v2i32 (sext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def astol_v2i64:Pat < (v2i64 (anyext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def ustol_v2i64:Pat < (v2i64 (zext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sstol_v2i64:Pat < (v2i64 (sext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+def aitol_v2i64:Pat < (v2i64 (anyext GPRV2I32:$src)),
+(LCREATE_v2i64
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def uitol_v2i64:Pat < (v2i64 (zext GPRV2I32:$src)),
+(LCREATE_v2i64
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sitol_v2i64:Pat < (v2i64 (sext GPRV2I32:$src)),
+(LCREATE_v2i64
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+
+def sctof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I8:$src)),
+(v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))))) >;
+
+
+def uctof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I8:$src)),
+(v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))))) >;
+
+
+def ftosc_v2i8:Pat < (v2i8 (fp_to_sint GPRV2F32:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def ftouc_v2i8:Pat < (v2i8 (fp_to_uint GPRV2F32:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+def sctod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I8:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+def uctod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I8:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+
+def dtosc_v2i8:Pat < (v2i8 (fp_to_sint GPRV2F64:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def dtouc_v2i8:Pat < (v2i8 (fp_to_uint GPRV2F64:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def sstof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I16:$src)),
+(v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))))) >;
+
+
+def ustof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I16:$src)),
+(v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))))) >;
+
+
+def ftoss_v2i16:Pat < (v2i16 (fp_to_sint GPRV2F32:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def ftous_v2i16:Pat < (v2i16 (fp_to_uint GPRV2F32:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def sstod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I16:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+def ustod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I16:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+
+def dtoss_v2i16:Pat < (v2i16 (fp_to_sint GPRV2F64:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def dtous_v2i16:Pat < (v2i16 (fp_to_uint GPRV2F64:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+def stoc_v2i8:Pat < (v2i8 (trunc GPRV2I16:$src)),
+(IL_ASV2CHAR_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+ (IL_ASV2INT_v2i16
+(BINARY_AND_v2i16 GPRV2I16:$src,
+ (VCREATE_v2i16 (LOADCONST_i16 0x000000FF))))
+ , (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))
+ ) >;
+
+
+def itoc_v2i8:Pat < (v2i8 (trunc GPRV2I32:$src)),
+(IL_ASV2CHAR_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+ (IL_ASV2INT_v2i32
+(BINARY_AND_v2i32 GPRV2I32:$src,
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))
+ , (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))
+ ) >;
+
+
+def itos_v2i16:Pat < (v2i16 (trunc GPRV2I32:$src)),
+(IL_ASV2SHORT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+ (IL_ASV2INT_v2i32
+(BINARY_AND_v2i32 GPRV2I32:$src,
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))
+ , (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))
+ ) >;
+
+
+def ltoc_v2i8:Pat < (v2i8 (trunc GPRV2I64:$src)),
+(IL_ASV2CHAR_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+ (BINARY_AND_v2i32
+(LLO_v2i64 GPRV2I64:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF)))
+ , (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))
+ ) >;
+
+
+def ltos_v2i16:Pat < (v2i16 (trunc GPRV2I64:$src)),
+(IL_ASV2SHORT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+ (BINARY_AND_v2i32
+(LLO_v2i64 GPRV2I64:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF)))
+ , (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))
+ ) >;
+
+
+def ltoi_v2i32:Pat < (v2i32 (trunc GPRV2I64:$src)), (LLO_v2i64 GPRV2I64:$src)>;
+
+
+
+def actos_v4i16:Pat < (v4i16 (anyext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def uctos_v4i16:Pat < (v4i16 (zext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def sctos_v4i16:Pat < (v4i16 (sext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def actoi_v4i32:Pat < (v4i32 (anyext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def uctoi_v4i32:Pat < (v4i32 (zext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def sctoi_v4i32:Pat < (v4i32 (sext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def astoi_v4i32:Pat < (v4i32 (anyext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+def ustoi_v4i32:Pat < (v4i32 (zext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+def sstoi_v4i32:Pat < (v4i32 (sext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+
+def sctof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I8:$src)),
+(v4f32
+ (ITOF_v4f32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24))))) >;
+
+
+def uctof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I8:$src)),
+(v4f32
+ (UTOF_v4f32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24))))) >;
+
+
+def ftosc_v4i8:Pat < (v4i8 (fp_to_sint GPRV4F32:$src)),
+(v4i8
+ (IL_ASV4CHAR_v4i32
+ (BINARY_AND_v4i32
+(FTOI_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def ftouc_v4i8:Pat < (v4i8 (fp_to_uint GPRV4F32:$src)),
+(v4i8
+ (IL_ASV4CHAR_v4i32
+ (BINARY_AND_v4i32
+(FTOU_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def sstof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I16:$src)),
+(v4f32
+ (ITOF_v4f32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16))))) >;
+
+
+def ustof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I16:$src)),
+(v4f32
+ (UTOF_v4f32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16))))) >;
+
+
+def ftoss_v4i16:Pat < (v4i16 (fp_to_sint GPRV4F32:$src)),
+(v4i16
+ (IL_ASV4SHORT_v4i32
+ (BINARY_AND_v4i32
+(FTOI_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def ftous_v4i16:Pat < (v4i16 (fp_to_uint GPRV4F32:$src)),
+(v4i16
+ (IL_ASV4SHORT_v4i32
+ (BINARY_AND_v4i32
+(FTOU_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+
+
+
+def stoc_v4i8:Pat < (v4i8 (trunc GPRV4I16:$src)),
+(IL_ASV4CHAR_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+ (IL_ASV4INT_v4i16
+(BINARY_AND_v4i16 GPRV4I16:$src,
+ (VCREATE_v4i16 (LOADCONST_i16 0x000000FF))))
+ , (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))
+ ) >;
+
+
+def itoc_v4i8:Pat < (v4i8 (trunc GPRV4I32:$src)),
+(IL_ASV4CHAR_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+ (IL_ASV4INT_v4i32
+(BINARY_AND_v4i32 GPRV4I32:$src,
+ (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))
+ , (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))
+ ) >;
+
+
+def itos_v4i16:Pat < (v4i16 (trunc GPRV4I32:$src)),
+(IL_ASV4SHORT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+ (IL_ASV4INT_v4i32
+(BINARY_AND_v4i32 GPRV4I32:$src,
+ (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))
+ , (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))
+ ) >;
+
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,149 @@
+//===-- AMDILDevice.cpp ---------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST)
+{
+ mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+ mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+ setCaps();
+ mDeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDILDevice::~AMDILDevice()
+{
+ mHWBits.clear();
+ mSWBits.clear();
+}
+
+size_t AMDILDevice::getMaxGDSSize() const
+{
+ return 0;
+}
+
+uint32_t
+AMDILDevice::getDeviceFlag() const
+{
+ return mDeviceFlag;
+}
+
+size_t AMDILDevice::getMaxNumCBs() const
+{
+ if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ return HW_MAX_NUM_CB;
+ }
+
+ return 0;
+}
+
+size_t AMDILDevice::getMaxCBSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ return MAX_CB_SIZE;
+ }
+
+ return 0;
+}
+
+size_t AMDILDevice::getMaxScratchSize() const
+{
+ return 65536;
+}
+
+uint32_t AMDILDevice::getStackAlignment() const
+{
+ return 16;
+}
+
+void AMDILDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::HalfOps);
+ mSWBits.set(AMDILDeviceInfo::ByteOps);
+ mSWBits.set(AMDILDeviceInfo::ShortOps);
+ mSWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) {
+ mSWBits.set(AMDILDeviceInfo::NoInline);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) {
+ mSWBits.set(AMDILDeviceInfo::MacroDB);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) {
+ mSWBits.set(AMDILDeviceInfo::NoAlias);
+ }
+ if (mSTM->isApple()) {
+ mSWBits.set(AMDILDeviceInfo::ConstantMem);
+ } else {
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::ConstantMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::ConstantMem);
+ }
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::PrivateMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::PrivateMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) {
+ mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+ }
+ mSWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.set(AMDILDeviceInfo::ByteGDSOps);
+ mSWBits.set(AMDILDeviceInfo::LongOps);
+}
+
+AMDILDeviceInfo::ExecutionMode
+AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
+{
+ if (mHWBits[Caps]) {
+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDILDeviceInfo::Hardware;
+ }
+
+ if (mSWBits[Caps]) {
+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDILDeviceInfo::Software;
+ }
+
+ return AMDILDeviceInfo::Unsupported;
+
+}
+
+bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
+}
+
+bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
+}
+
+bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
+}
+
+std::string
+AMDILDevice::getDataLayout() const
+{
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,130 @@
+//===-- AMDILDevice.h -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILDEVICEIMPL_H_
+#define _AMDILDEVICEIMPL_H_
+#include "AMDIL.h"
+#include "AMDILLLVMPC.h"
+#include "llvm/ADT/BitVector.h"
+namespace llvm
+{
+class AMDILSubtarget;
+class AMDILAsmPrinter;
+class AMDILIOExpansion;
+class AMDILPointerManager;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDILDevice
+{
+public:
+ AMDILDevice(AMDILSubtarget *ST);
+ virtual ~AMDILDevice();
+
+ // Enum values for the various memory types.
+ enum {
+ RAW_UAV_ID = 0,
+ ARENA_UAV_ID = 1,
+ LDS_ID = 2,
+ GDS_ID = 3,
+ SCRATCH_ID = 4,
+ CONSTANT_ID = 5,
+ GLOBAL_ID = 6,
+ MAX_IDS = 7
+ } IO_TYPE_IDS;
+
+ // Returns the max LDS size that the hardware supports. Size is in
+ // bytes.
+ virtual size_t getMaxLDSSize() const = 0;
+
+ // Returns the max GDS size that the hardware supports if the GDS is
+ // supported by the hardware. Size is in bytes.
+ virtual size_t getMaxGDSSize() const;
+
+ // Returns the max number of hardware constant address spaces that
+ // are supported by this device.
+ virtual size_t getMaxNumCBs() const;
+
+ // Returns the max number of bytes a single hardware constant buffer
+ // can support. Size is in bytes.
+ virtual size_t getMaxCBSize() const;
+
+ // Returns the max number of bytes allowed by the hardware scratch
+ // buffer. Size is in bytes.
+ virtual size_t getMaxScratchSize() const;
+
+ // Get the flag that corresponds to the device.
+ virtual uint32_t getDeviceFlag() const;
+
+ // Returns the number of work-items that exist in a single hardware
+ // wavefront.
+ virtual size_t getWavefrontSize() const = 0;
+
+ // Get the generational name of this specific device.
+ virtual uint32_t getGeneration() const = 0;
+
+ // Get the stack alignment of this specific device.
+ virtual uint32_t getStackAlignment() const;
+
+ // Get the resource ID for this specific device.
+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+ // Get the max number of UAV's for this device.
+ virtual uint32_t getMaxNumUAVs() const = 0;
+
+ // Interface to get the IO Expansion pass for each device.
+ virtual FunctionPass*
+ getIOExpansion(TargetMachine&, CodeGenOpt::Level) const = 0;
+
+ // Interface to get the Asm printer for each device.
+ virtual AsmPrinter*
+ getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const = 0;
+
+ // Interface to get the Pointer manager pass for each device.
+ virtual FunctionPass*
+ getPointerManager(TargetMachine&, CodeGenOpt::Level) const = 0;
+
+
+ // API utilizing more detailed capabilities of each family of
+ // cards. If a capability is supported, then either usesHardware or
+ // usesSoftware returned true. If usesHardware returned true, then
+ // usesSoftware must return false for the same capability. Hardware
+ // execution means that the feature is done natively by the hardware
+ // and is not emulated by the softare. Software execution means
+ // that the feature could be done in the hardware, but there is
+ // software that emulates it with possibly using the hardware for
+ // support since the hardware does not fully comply with OpenCL
+ // specs.
+ bool isSupported(AMDILDeviceInfo::Caps Mode) const;
+ bool usesHardware(AMDILDeviceInfo::Caps Mode) const;
+ bool usesSoftware(AMDILDeviceInfo::Caps Mode) const;
+ virtual std::string getDataLayout() const;
+ static const unsigned int MAX_LDS_SIZE_700 = 16384;
+ static const unsigned int MAX_LDS_SIZE_800 = 32768;
+ static const unsigned int MAX_GDS_SIZE_800 = 32768;
+ static const unsigned int WavefrontSize = 64;
+ static const unsigned int HalfWavefrontSize = 32;
+ static const unsigned int QuarterWavefrontSize = 16;
+protected:
+ virtual void setCaps();
+ llvm::BitVector mHWBits;
+ llvm::BitVector mSWBits;
+ AMDILSubtarget *mSTM;
+ uint32_t mDeviceFlag;
+private:
+ AMDILDeviceInfo::ExecutionMode
+ getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
+}; // AMDILDevice
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,132 @@
+//===-- AMDILDeviceInfo.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILDevices.h"
+#include "AMDILSubtarget.h"
+#include <string>
+using namespace llvm;
+namespace llvm
+{
+AMDILDevice*
+getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
+{
+ if (deviceName.c_str()[2] == '7') {
+ switch (deviceName.c_str()[3]) {
+ case '1':
+ return new AMDIL710Device(ptr);
+ case '7':
+ return new AMDIL770Device(ptr);
+ default:
+ return new AMDIL7XXDevice(ptr);
+ };
+ } else if (deviceName == "cypress") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCypressDevice(ptr);
+ } else if (deviceName == "juniper") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILEvergreenDevice(ptr);
+ } else if (deviceName == "redwood") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILRedwoodDevice(ptr);
+ } else if (deviceName == "cedar") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCedarDevice(ptr);
+ } else if (deviceName == "barts"
+ || deviceName == "turks") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILNIDevice(ptr);
+ } else if (deviceName == "cayman"
+ || deviceName == "kauai") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCaymanDevice(ptr);
+ } else if (deviceName == "trinity") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILTrinityDevice(ptr);
+ } else if (deviceName == "caicos") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILNIDevice(ptr);
+ } else if (deviceName == "tahiti") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ if (is64bit) {
+ return new AMDILSIDevice64(ptr);
+ } else {
+ return new AMDILSIDevice32(ptr);
+ }
+ } else if (deviceName == "pitcairn") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ if (is64bit) {
+ return new AMDILSIDevice64(ptr);
+ } else {
+ return new AMDILSIDevice32(ptr);
+ }
+ } else if (deviceName == "capeverde") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ if (is64bit) {
+ return new AMDILSIDevice64(ptr);
+ } else {
+ return new AMDILSIDevice32(ptr);
+ }
+ } else {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDIL7XXDevice(ptr);
+ }
+}
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,94 @@
+//===-- AMDILDeviceInfo.h -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILDEVICEINFO_H_
+#define _AMDILDEVICEINFO_H_
+#include <string>
+namespace llvm
+{
+class AMDILDevice;
+class AMDILSubtarget;
+namespace AMDILDeviceInfo
+{
+// Each Capabilities can be executed using a hardware instruction,
+// emulated with a sequence of software instructions, or not
+// supported at all.
+enum ExecutionMode {
+ Unsupported = 0, // Unsupported feature on the card(Default value)
+ Software, // This is the execution mode that is set if the
+ // feature is emulated in software
+ Hardware // This execution mode is set if the feature exists
+ // natively in hardware
+};
+
+// Any changes to this needs to have a corresponding update to the
+// twiki page GPUMetadataABI
+enum Caps {
+ HalfOps = 0x1, // Half float is supported or not.
+ DoubleOps = 0x2, // Double is supported or not.
+ ByteOps = 0x3, // Byte(char) is support or not.
+ ShortOps = 0x4, // Short is supported or not.
+ LongOps = 0x5, // Long is supported or not.
+ Images = 0x6, // Images are supported or not.
+ ByteStores = 0x7, // ByteStores available(!HD4XXX).
+ ConstantMem = 0x8, // Constant/CB memory.
+ LocalMem = 0x9, // Local/LDS memory.
+ PrivateMem = 0xA, // Scratch/Private/Stack memory.
+ RegionMem = 0xB, // OCL GDS Memory Extension.
+ FMA = 0xC, // Use HW FMA or SW FMA.
+ ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
+ MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
+ PPAMode = 0xF, // UAV Per Pointer Allocation Mode capability
+ NoAlias = 0x10, // Cached loads.
+ Signed24BitOps = 0x11, // Peephole Optimization.
+ // Debug mode implies that no hardware features or optimizations
+ // are performned and that all memory access go through a single
+ // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+ Debug = 0x12, // Debug mode is enabled.
+ CachedMem = 0x13, // Cached mem is available or not.
+ BarrierDetect = 0x14, // Detect duplicate barriers.
+ Semaphore = 0x15, // Flag to specify that semaphores are supported.
+ ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
+ ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
+ TmrReg = 0x18, // Flag to specify if Tmr register is supported.
+ NoInline = 0x19, // Flag to specify that no inlining should occur.
+ MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
+ HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
+ ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
+ PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
+ ByteGDSOps = 0x1F, // Flag to specify if byte GDS ops are available.
+ FlatMem = 0x20, // Flag to specify if device supports flat addressing.
+ // If more capabilities are required, then
+ // this number needs to be increased.
+ // All capabilities must come before this
+ // number.
+ MaxNumberCapabilities = 0x30
+};
+// These have to be in order with the older generations
+// having the lower number enumerations.
+enum Generation {
+ HD4XXX = 0, // 7XX based devices.
+ HD5XXX, // Evergreen based devices.
+ HD6XXX, // NI/Evergreen+ based devices.
+ HD7XXX, // SI based devices.
+ HD8XXX, // CI based devices.
+ HDTEST, // Experimental feature testing device.
+ HDNUMGEN
+};
+
+
+} // namespace AMDILDeviceInfo
+llvm::AMDILDevice*
+getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
+} // namespace llvm
+#endif // _AMDILDEVICEINFO_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,24 @@
+//===-- AMDILDevices.h ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __AMDIL_DEVICES_H_
+#define __AMDIL_DEVICES_H_
+// Include all of the device specific header files
+// This file is for Internal use only!
+#include "AMDILDevice.h"
+#include "AMDIL7XXDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILTNDevice.h"
+#include "AMDILSIDevice.h"
+#endif // _AMDIL_DEVICES_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,231 @@
+//===-- AMDILEGAsmPrinter.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILEGAsmPrinter.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+
+// TODO: Add support for verbose.
+AMDILEGAsmPrinter::AMDILEGAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
+ : AMDILAsmPrinter(ASM_PRINTER_ARGUMENTS)
+{
+}
+
+AMDILEGAsmPrinter::~AMDILEGAsmPrinter()
+{
+}
+//
+// @param name
+// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+// and returns that name if both of the tokens are present.
+//
+static
+std::string Strip(const std::string &name)
+{
+ size_t start = name.find("__OpenCL_");
+ size_t end = name.find("_kernel");
+ if (start == std::string::npos
+ || end == std::string::npos
+ || (start == end)) {
+ return name;
+ } else {
+ return name.substr(9, name.length()-16);
+ }
+}
+void
+AMDILEGAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+ OSTREAM_TYPE &O)
+{
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ const char *name = "unknown";
+ llvm::StringRef nameRef;
+ if (MI->getOperand(0).isGlobal()) {
+ nameRef = MI->getOperand(0).getGlobal()->getName();
+ name = nameRef.data();
+ }
+ if (!::strncmp(name, "__fma_f32", 9) && curTarget->device()->usesHardware(
+ AMDILDeviceInfo::FMA)) {
+ name = "__hwfma_f32";
+ }
+ emitMCallInst(MI, O, name);
+}
+
+bool
+AMDILEGAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+ this->MF = &lMF;
+ mMeta->setMF(&lMF);
+ mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+ mAMI = &(lMF.getMMI().getObjFileInfo<AMDILModuleInfo>());
+
+ SetupMachineFunction(lMF);
+ std::string kernelName = MF->getFunction()->getName();
+ mName = Strip(kernelName);
+
+ mKernelName = kernelName;
+ EmitFunctionHeader();
+ EmitFunctionBody();
+ return false;
+}
+void
+AMDILEGAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+ std::string FunStr;
+ raw_string_ostream OFunStr(FunStr);
+ formatted_raw_ostream O(OFunStr);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ if (mDebugMode) {
+ O << ";" ;
+ II->print(O);
+ }
+ if (isMacroFunc(II)) {
+ emitMacroFunc(II, O);
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+ return;
+ }
+ if (isMacroCall(II)) {
+ unsigned reg = 0;
+ unsigned newDst = 0;
+ OpSwizzle opSwiz, oldSwiz;
+ const char *name = mTM->getInstrInfo()->getName(II->getOpcode()) + 5;
+ if (!::strncmp(name, "__fma_f32", 9)
+ && curTarget->device()->usesHardware(
+ AMDILDeviceInfo::FMA)) {
+ name = "__hwfma_f32";
+ }
+ //II->dump();
+ //assert(0 &&
+ //"Found a macro that is still in use!");
+ int macronum = amd::MacroDBFindMacro(name);
+ O << "\t;"<< name<<"\n";
+ O << "\tmcall("<<macronum<<") ";
+ reg = II->getOperand(0).getReg();
+ newDst = AMDIL::R1000;
+ oldSwiz.u8all = opSwiz.u8all =
+ II->getOperand(0).getTargetFlags();
+ if (isXComponentReg(reg)) {
+ newDst = AMDIL::Rx1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isYComponentReg(reg)) {
+ newDst = AMDIL::Ry1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isZComponentReg(reg)) {
+ newDst = AMDIL::Rz1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isWComponentReg(reg)) {
+ newDst = AMDIL::Rw1000;
+ opSwiz.bits.swizzle = AMDIL_DST_X___;
+ } else if (isXYComponentReg(reg)) {
+ newDst = AMDIL::Rxy1000;
+ opSwiz.bits.swizzle = AMDIL_DST_XY__;
+ } else if (isZWComponentReg(reg)) {
+ newDst = AMDIL::Rzw1000;
+ opSwiz.bits.swizzle = AMDIL_DST_XY__;
+ } else {
+ opSwiz.bits.swizzle = AMDIL_DST_DFLT;
+ }
+ for (unsigned x = 0, y = II->getNumOperands(); x < y; ++x) {
+ if (!x) {
+ O << "(";
+ O << getRegisterName(newDst);
+ O << getDstSwizzle(opSwiz.bits.swizzle);
+ } else {
+ printOperand(II, x
+ , O
+ );
+ }
+ if (!x) {
+ O << "), (";
+ } else if (x != y - 1) {
+ O << ", ";
+ } else {
+ O << ")\n";
+ }
+ }
+ O << "\tmov " << getRegisterName(reg) << getDstSwizzle(oldSwiz.bits.swizzle)
+ << ", " << getRegisterName(newDst);
+ if (isXComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_X000);
+ } else if (isYComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_0X00);
+ } else if (isZComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_00X0);
+ } else if (isWComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_000X);
+ } else if (isXYComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_XY00);
+ } else if (isZWComponentReg(reg)) {
+ O << getSrcSwizzle(AMDIL_SRC_00XY);
+ } else {
+ O << getSrcSwizzle(AMDIL_SRC_DFLT);
+ }
+ O << "\n";
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ mMacroIDs.insert(macronum);
+ } else {
+ mMFI->addCalledIntr(macronum);
+ }
+ } else {
+
+ // Print the assembly for the instruction.
+ // We want to make sure that we do HW constants
+ // before we do arena segment
+ // TODO: This is a hack to get around some
+ // conformance failures.
+ if (mMeta->useCompilerWrite(II)) {
+ O << "\tif_logicalz cb0[0].x\n";
+ if (mMFI->usesMem(AMDILDevice::RAW_UAV_ID)) {
+ O << "\tuav_raw_store_id("
+ << curTarget->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+ << ") ";
+ O << "mem0.x___, cb0[3].x, r0.0\n";
+ } else {
+ O << "\tuav_arena_store_id("
+ << curTarget->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)
+ << ")_size(dword) ";
+ O << "cb0[3].x, r0.0\n";
+ }
+ O << "\tendif\n";
+ mMFI->addMetadata(";memory:compilerwrite");
+ } else {
+ printInstruction(II, O);
+ }
+ }
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,61 @@
+//===-- AMDILEGAsmPrinter.h -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Asm Printer Class for Evergreen N. Islands generation of cards. This class
+// handles all of the items that are unique to the these devices that must be handled
+// by the AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_EG_ASM_PRINTER_H_
+#define _AMDIL_EG_ASM_PRINTER_H_
+#include "AMDILAsmPrinter.h"
+
+namespace llvm
+{
+class LLVM_LIBRARY_VISIBILITY AMDILEGAsmPrinter : public AMDILAsmPrinter
+{
+public:
+ //
+ // Constructor for the AMDIL EG specific AsmPrinter class.
+ // Interface is defined by LLVM proper and should reference
+ // there for more information.
+ //
+ AMDILEGAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS);
+
+ //
+ // Destructor for the EG Asm Printer class that deletes
+ // all of the allocated memory
+ //
+ virtual ~AMDILEGAsmPrinter();
+
+ void
+ EmitInstruction(const MachineInstr *MI);
+
+ //
+ // @param F MachineFunction to print the assembly for
+ // @brief parse the specified machine function and print
+ // out the assembly for all the instructions in the function
+ //
+ bool
+ runOnMachineFunction(MachineFunction &F);
+
+protected:
+ //
+ // @param MI Machine instruction to emit the macro code for
+ //
+ // Emits a fully functional macro function that uses the argument
+ // registers as the macro arguments.
+ //
+ virtual void
+ emitMacroFunc(const MachineInstr *MI, OSTREAM_TYPE &O);
+
+}; // AMDILEGAsmPrinter
+} // end of llvm namespace
+#endif // _AMDIL_EG_ASM_PRINTER_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1288 @@
+//===-- AMDILEGIOExpansion.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of IO expansion class for evergreen and NI devices.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILIOExpansion.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cstdio>
+using namespace llvm;
+AMDILEGIOExpansion::AMDILEGIOExpansion(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel) : AMDILImageExpansion(tm, OptLevel)
+{
+}
+
+AMDILEGIOExpansion::~AMDILEGIOExpansion()
+{
+}
+const char *AMDILEGIOExpansion::getPassName() const
+{
+ return "AMDIL EG/NI IO Expansion Pass";
+}
+bool
+AMDILEGIOExpansion::isImageIO(MachineInstr *MI)
+{
+ if (!MI->getOperand(0).isGlobal()) {
+ return false;
+ }
+ const llvm::StringRef& nameRef = MI->getOperand(0).getGlobal()->getName();
+ const char *name = nameRef.data();
+ if (nameRef.size() > 8 && !strncmp(name, "__amdil_", 8)) {
+ name += 8;
+ if (!strncmp(name, "sample_data", 11)
+ || !strncmp(name, "write_image", 11)
+ || !strncmp(name, "get_image", 9)
+ ) {
+ return true;
+ }
+ }
+ return false;
+}
+bool
+AMDILEGIOExpansion::isIOInstruction(MachineInstr *MI)
+{
+ if (!MI) {
+ return false;
+ }
+ switch (MI->getOpcode()) {
+ default:
+ return AMDILIOExpansion::isIOInstruction(MI);
+ case AMDIL::IMAGE1D_READ:
+ case AMDIL::IMAGE1D_READ_UNNORM:
+ case AMDIL::IMAGE1D_WRITE:
+ case AMDIL::IMAGE1D_INFO0:
+ case AMDIL::IMAGE1D_INFO1:
+ case AMDIL::IMAGE1DA_READ:
+ case AMDIL::IMAGE1DA_READ_UNNORM:
+ case AMDIL::IMAGE1DA_WRITE:
+ case AMDIL::IMAGE1DA_INFO0:
+ case AMDIL::IMAGE1DA_INFO1:
+ case AMDIL::IMAGE1DB_TXLD:
+ case AMDIL::IMAGE1DB_READ:
+ case AMDIL::IMAGE1DB_READ_UNNORM:
+ case AMDIL::IMAGE1DB_WRITE:
+ case AMDIL::IMAGE1DB_INFO0:
+ case AMDIL::IMAGE1DB_INFO1:
+ case AMDIL::IMAGE2D_READ:
+ case AMDIL::IMAGE2D_READ_UNNORM:
+ case AMDIL::IMAGE2D_WRITE:
+ case AMDIL::IMAGE2D_INFO0:
+ case AMDIL::IMAGE2D_INFO1:
+ case AMDIL::IMAGE2DA_READ:
+ case AMDIL::IMAGE2DA_READ_UNNORM:
+ case AMDIL::IMAGE2DA_WRITE:
+ case AMDIL::IMAGE2DA_INFO0:
+ case AMDIL::IMAGE2DA_INFO1:
+ case AMDIL::IMAGE3D_READ:
+ case AMDIL::IMAGE3D_READ_UNNORM:
+ case AMDIL::IMAGE3D_WRITE:
+ case AMDIL::IMAGE3D_INFO0:
+ case AMDIL::IMAGE3D_INFO1:
+ return true;
+ };
+ return false;
+}
+void
+AMDILEGIOExpansion::expandIOInstruction(MachineInstr *MI)
+{
+ assert(isIOInstruction(MI) && "Must be an IO instruction to "
+ "be passed to this function!");
+ switch (MI->getOpcode()) {
+ default:
+ AMDILIOExpansion::expandIOInstruction(MI);
+ break;
+ case AMDIL::IMAGE1D_READ:
+ case AMDIL::IMAGE1DA_READ:
+ case AMDIL::IMAGE1DB_TXLD:
+ case AMDIL::IMAGE1DB_READ:
+ case AMDIL::IMAGE2D_READ:
+ case AMDIL::IMAGE2DA_READ:
+ case AMDIL::IMAGE3D_READ:
+ case AMDIL::IMAGE1D_READ_UNNORM:
+ case AMDIL::IMAGE1DA_READ_UNNORM:
+ case AMDIL::IMAGE1DB_READ_UNNORM:
+ case AMDIL::IMAGE2D_READ_UNNORM:
+ case AMDIL::IMAGE2DA_READ_UNNORM:
+ case AMDIL::IMAGE3D_READ_UNNORM:
+ expandImageLoad(mBB, MI);
+ break;
+ case AMDIL::IMAGE1D_WRITE:
+ case AMDIL::IMAGE1DA_WRITE:
+ case AMDIL::IMAGE1DB_WRITE:
+ case AMDIL::IMAGE2D_WRITE:
+ case AMDIL::IMAGE2DA_WRITE:
+ case AMDIL::IMAGE3D_WRITE:
+ expandImageStore(mBB, MI);
+ break;
+ case AMDIL::IMAGE1D_INFO0:
+ case AMDIL::IMAGE1D_INFO1:
+ case AMDIL::IMAGE1DA_INFO0:
+ case AMDIL::IMAGE1DA_INFO1:
+ case AMDIL::IMAGE1DB_INFO0:
+ case AMDIL::IMAGE1DB_INFO1:
+ case AMDIL::IMAGE2D_INFO0:
+ case AMDIL::IMAGE2D_INFO1:
+ case AMDIL::IMAGE2DA_INFO0:
+ case AMDIL::IMAGE2DA_INFO1:
+ case AMDIL::IMAGE3D_INFO0:
+ case AMDIL::IMAGE3D_INFO1:
+ expandImageParam(mBB, MI);
+ break;
+ };
+}
+bool
+AMDILEGIOExpansion::isCacheableOp(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ // We only support caching on UAV11 - JeffG
+ if (curRes.bits.ResourceID == 11) {
+ return curRes.bits.CacheableRead;
+ } else {
+ return false;
+ }
+}
+bool
+AMDILEGIOExpansion::isArenaOp(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ return curRes.bits.ResourceID
+ == mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)
+ || curRes.bits.ResourceID >= ARENA_SEGMENT_RESERVED_UAVS;
+}
+void
+AMDILEGIOExpansion::expandPackedData(MachineInstr *MI)
+{
+ if (!isPackedData(MI)) {
+ return;
+ }
+ // There is a bug in the CAL compiler that incorrectly
+ // errors when the UBIT_INSERT instruction is used.
+ if (mSTM->calVersion() < CAL_VERSION_SC_137) {
+ AMDIL789IOExpansion::expandPackedData(MI);
+ return;
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ // If we have packed data, then the shift size is no longer
+ // the same as the load size and we need to adjust accordingly
+ switch(getPackedID(MI)) {
+ default:
+ break;
+ case PACK_V2I8: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(8)).addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::Ry1011).addReg(AMDIL::Rx1011);
+ }
+ break;
+ case PACK_V4I8: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::Rxy1012)
+
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
+
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32),
+ AMDIL::Rxy1011)
+
+ .addImm(mMFI->addi64Literal(8ULL | (8ULL << 32)))
+ .addImm(mMFI->addi64Literal(8ULL | (8ULL << 32)))
+ .addReg(AMDIL::Rxy1012).addReg(AMDIL::Rxy1011);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::Ry1011).addReg(AMDIL::Rx1011);
+ }
+ break;
+ case PACK_V2I16: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::Ry1011).addReg(AMDIL::Rx1011);
+ }
+ break;
+ case PACK_V4I16: {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::Rxy1012)
+
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
+
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::Rxy1011)
+
+ .addImm(mMFI->addi64Literal(16ULL | (16ULL << 32)))
+ .addImm(mMFI->addi64Literal(16ULL | (16ULL << 32)))
+ .addReg(AMDIL::Rxy1012).addReg(AMDIL::Rxy1011);
+ }
+ break;
+ case UNPACK_V2I8:
+ case UNPACK_V4I8:
+ case UNPACK_V2I16:
+ case UNPACK_V4I16:
+ AMDIL789IOExpansion::expandPackedData(MI);
+ break;
+ };
+}
+static bool
+isAlignedInst(MachineInstr *MI)
+{
+ if (!MI->memoperands_empty()) {
+ return ((*MI->memoperands_begin())->getAlignment()
+ & ((*MI->memoperands_begin())->getSize() - 1)) == 0;
+ }
+ return true;
+}
+
+void
+AMDILEGIOExpansion::expandGlobalLoad(MachineInstr *MI)
+{
+ bool usesArena = isArenaOp(MI);
+ bool cacheable = isCacheableOp(MI);
+ bool aligned = mSTM->calVersion() >= CAL_CACHED_ALIGNED_UAVS
+ && isAlignedInst(MI);
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ // These instructions are generated before the current MI.
+ expandLoadStartCode(MI);
+ expandArenaSetup(MI);
+ DebugLoc DL = MI->getDebugLoc();
+ if (getMemorySize(MI) == 1) {
+ if (usesArena) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i8), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1012)
+ .addImm(mMFI->addi32Literal(0))
+ .addImm(mMFI->addi32Literal(24));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Ry1012)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::Rx1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rz1012)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::Rx1008);
+ if (cacheable) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+ }
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_v4i8), AMDIL::R1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ }
+ } else if (getMemorySize(MI) == 2) {
+ if (usesArena) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i16), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ if (cacheable) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+ }
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i16), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ }
+ } else if (getMemorySize(MI) == 4) {
+ if (usesArena) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(ID);
+ } else {
+ if (cacheable) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+ }
+ }
+ } else if (getMemorySize(MI) == 8) {
+ if (usesArena) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Ry1011)
+ .addReg(AMDIL::Ry1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1007)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ }
+ } else {
+ if (cacheable) {
+ if (aligned) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHEDALIGNED_v2i32),
+ AMDIL::Rxy1011).addReg(AMDIL::Rx1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v2i32),
+ AMDIL::Rxy1011).addReg(AMDIL::Rx1010).addImm(ID);
+ }
+
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32),
+ AMDIL::Rxy1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+ }
+ }
+ } else {
+ if (usesArena) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Ry1011)
+ .addReg(AMDIL::Ry1010)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rz1011)
+ .addReg(AMDIL::Rz1010)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rw1011)
+ .addReg(AMDIL::Rw1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1007)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+ .addReg(AMDIL::R1010)
+ .addImm(3);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1007)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+
+ .addReg(AMDIL::R1010)
+ .addImm(4);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1006)
+ .addReg(AMDIL::Rx1007)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rzw1011)
+ .addReg(AMDIL::Rx1006)
+ .addReg(AMDIL::Rx1008);
+ }
+ } else {
+ if (cacheable) {
+ if (aligned) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHEDALIGNED_v4i32),
+ AMDIL::R1011).addReg(AMDIL::Rx1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v4i32),
+ AMDIL::R1011).addReg(AMDIL::Rx1010).addImm(ID);
+ }
+
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32),
+ AMDIL::R1011).addReg(AMDIL::Rx1010).addImm(ID);
+
+ }
+ }
+ }
+ expandPackedData(MI);
+ unsigned dataReg = expandExtendLoad(MI);
+ if (!dataReg) {
+ dataReg = getDataReg(MI);
+ }
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(dataReg);
+ MI->getOperand(0).setReg(dataReg);
+}
+
+void
+AMDILEGIOExpansion::expandRegionLoad(MachineInstr *MI)
+{
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+ return;
+ }
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesGDS() && mMFI->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned mulOp = 0;
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS load that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ unsigned dstReg = AMDIL::R1011;
+ // These instructions are generated before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+ .addReg(AMDIL::Ry1010)
+
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rz1011)
+ .addReg(AMDIL::Rz1010)
+
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rw1011)
+ .addReg(AMDIL::Rw1010)
+
+ .addImm(gID);
+ break;
+ case 1:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(gID);
+ // The instruction would normally fit in right here so everything created
+ // after this point needs to go into the afterInst vector.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1011);
+ dstReg = AMDIL::Rx1011;
+ } else {
+ if (isSWSExtLoadInst(MI)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_i8), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(gID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_u8), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(gID);
+ }
+ }
+ break;
+ case 2:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1011);
+ } else {
+ if (isSWSExtLoadInst(MI)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_i16), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(gID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_u16), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+ .addImm(gID);
+ }
+ }
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+ .addReg(AMDIL::Ry1010)
+
+ .addImm(gID);
+ break;
+ };
+ expandPackedData(MI);
+ unsigned dataReg = expandExtendLoad(MI);
+ if (!dataReg) {
+ dataReg = getDataReg(MI);
+ }
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(dataReg);
+ MI->getOperand(0).setReg(dataReg);
+}
+void
+AMDILEGIOExpansion::expandLocalLoad(MachineInstr *MI)
+{
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesLDS() && mMFI->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS load that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned mulOp = 0;
+ // These instructions are generated before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC_v4i32), AMDIL::R1011)
+
+ .addReg(AMDIL::Rx1010)
+ .addImm(lID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC_v2i32), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(lID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(lID);
+ break;
+ case 1:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1011);
+ } else {
+ if (isSWSExtLoadInst(MI)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_i8), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_u8), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(lID);
+ }
+ }
+ break;
+ case 2:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1011);
+ } else {
+ if (isSWSExtLoadInst(MI)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_i16), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_u16), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(lID);
+ }
+ }
+ break;
+ }
+ expandPackedData(MI);
+ unsigned dataReg = expandExtendLoad(MI);
+ if (!dataReg) {
+ dataReg = getDataReg(MI);
+ }
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(dataReg);
+ MI->getOperand(0).setReg(dataReg);
+}
+void
+AMDILEGIOExpansion::expandGlobalStore(MachineInstr *MI)
+{
+ bool usesArena = isArenaOp(MI);
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions are expandted before the current MI.
+ expandStoreSetupCode(MI);
+ expandArenaSetup(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ if (usesArena) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Ry1010)
+ .addReg(AMDIL::Ry1011)
+
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rz1010)
+ .addReg(AMDIL::Rz1011)
+
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rw1010)
+ .addReg(AMDIL::Rw1011)
+
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
+
+ .addReg(AMDIL::R1011)
+ .addImm(2);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+
+ .addReg(AMDIL::R1010)
+ .addImm(3);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
+
+ .addReg(AMDIL::R1011)
+ .addImm(3);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+
+ .addReg(AMDIL::R1010)
+ .addImm(4);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
+
+ .addReg(AMDIL::R1011)
+ .addImm(4);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(ID);
+ }
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
+ .addReg(AMDIL::Rx1010)
+
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ case 1:
+ if (usesArena) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i8), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+ .addReg(AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(ID);
+ }
+ break;
+ case 2:
+ if (usesArena) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i16), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+ .addReg(AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(ID);
+ }
+ break;
+ case 4:
+ if (usesArena) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+ .addReg(AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(ID);
+ }
+ break;
+ case 8:
+ if (usesArena) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Ry1010)
+ .addReg(AMDIL::Ry1011)
+
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rxy1010)
+
+ .addImm(2);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rxy1011)
+
+ .addImm(2);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(ID);
+ }
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEMxy)
+ .addReg(AMDIL::Rx1010)
+ .addReg(AMDIL::Rxy1011)
+
+ .addImm(ID);
+ }
+ break;
+ };
+}
+void
+AMDILEGIOExpansion::expandRegionStore(MachineInstr *MI)
+{
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalStore(MI);
+ }
+ mKM->setOutputInst();
+ if (!mMFI->usesGDS() && mMFI->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS store that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned mulOp = HWRegion ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32;
+ // These instructions are expandted before the current MI.
+ expandStoreSetupCode(MI);
+ expandArenaSetup(MI);
+ switch (getMemorySize(MI)) {
+ default:
+
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
+ .addReg(AMDIL::Ry1011)
+
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rz1010)
+ .addReg(AMDIL::Rz1011)
+
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rw1010)
+ .addReg(AMDIL::Rw1011)
+
+ .addImm(gID);
+ break;
+ case 1:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1012)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(3));
+
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1006)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(0xFFFFFF00))
+ .addImm(mMFI->addi32Literal(0x00FFFFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Ry1007)
+ .addReg(AMDIL::Ry1008)
+ .addReg(AMDIL::Rx1007)
+
+ .addImm(mMFI->addi32Literal(0xFF00FFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rz1012)
+ .addReg(AMDIL::Rz1008)
+ .addReg(AMDIL::Rx1007)
+
+ .addImm(mMFI->addi32Literal(0xFFFF00FF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1007);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_R_MSKOR_NORET))
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0))
+ .addReg(AMDIL::Rx1012)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(gID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_i8), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(gID);
+ }
+ break;
+ case 2:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(0x0000FFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(0x0000FFFF))
+ .addImm(mMFI->addi32Literal(0xFFFF0000));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_R_MSKOR_NORET))
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0))
+ .addReg(AMDIL::Rx1012)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(gID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_i16), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(gID);
+ }
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
+ .addReg(AMDIL::Ry1011)
+
+ .addImm(gID);
+ break;
+ };
+}
+
+void
+AMDILEGIOExpansion::expandLocalStore(MachineInstr *MI)
+{
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalStore(MI);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ if (!mMFI->usesLDS() && mMFI->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS store that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ unsigned mulOp = HWLocal ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32;
+ // These instructions are expandted before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC_v4i32), AMDIL::MEM)
+
+ .addReg(AMDIL::Rx1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC_v2i32), AMDIL::MEMxy)
+ .addReg(AMDIL::Rx1010)
+ .addReg(AMDIL::Rxy1011)
+
+ .addImm(lID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTORE), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(lID);
+ break;
+ case 1:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1012)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(3));
+
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::Rx1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1006)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(0xFFFFFF00))
+ .addImm(mMFI->addi32Literal(0x00FFFFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+ .addReg(AMDIL::Ry1008)
+ .addReg(AMDIL::Rx1007)
+
+ .addImm(mMFI->addi32Literal(0xFF00FFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+ .addReg(AMDIL::Rz1008)
+ .addReg(AMDIL::Rx1007)
+
+ .addImm(mMFI->addi32Literal(0xFFFF00FF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1006);
+
+ if (mSTM->calVersion() >= CAL_VERSION_SC_137) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_MSKOR_NORET))
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0))
+ .addReg(AMDIL::Rx1012)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET),
+ AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1012)
+
+ .addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_OR_NORET),
+ AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(lID);
+ }
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTORE_i8), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(lID);
+ }
+ break;
+ case 2:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(mMFI->addi32Literal(0x0000FFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(0x0000FFFF))
+ .addImm(mMFI->addi32Literal(0xFFFF0000));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1008)
+
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1008);
+
+ if (mSTM->calVersion() >= CAL_VERSION_SC_137) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_MSKOR_NORET))
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi32Literal(0))
+ .addReg(AMDIL::Rx1012)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET),
+ AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1012)
+
+ .addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_OR_NORET),
+ AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(lID);
+ }
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTORE_i16), AMDIL::Rx1010)
+ .addReg(AMDIL::Rx1011)
+
+ .addImm(lID);
+ }
+ break;
+ }
+}
+
+
+void
+AMDILEGIOExpansion::expandStoreSetupCode(MachineInstr *MI)
+{
+ AMDIL789IOExpansion::expandStoreSetupCode(MI);
+}
+void
+AMDILEGIOExpansion::expandArenaSetup(MachineInstr *MI)
+{
+ if (!isArenaOp(MI)) {
+ return;
+ }
+ const MCInstrDesc &TID = (MI->getDesc());
+ const MCOperandInfo &TOI = TID.OpInfo[0];
+ unsigned short RegClass = TOI.RegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ switch (RegClass) {
+ case AMDIL::GPRV4I16RegClassID:
+ case AMDIL::GPRI64RegClassID:
+ case AMDIL::GPRF64RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ case AMDIL::GPRV2F32RegClassID:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
+ .addReg(AMDIL::Rx1010)
+
+ .addImm(mMFI->addi64Literal(4ULL << 32));
+ break;
+ default:
+
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::Rx1010)
+ .addImm(mMFI->addi128Literal(4ULL << 32, 8ULL | (12ULL << 32)));
+ break;
+ case AMDIL::GPRI8RegClassID:
+ case AMDIL::GPRV2I8RegClassID:
+ case AMDIL::GPRI16RegClassID:
+ case AMDIL::GPRV2I16RegClassID:
+ case AMDIL::GPRV4I8RegClassID:
+ case AMDIL::GPRI32RegClassID:
+ case AMDIL::GPRF32RegClassID:
+ break;
+ };
+}
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,79 @@
+//===-- AMDILELFWriterInfo.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL.h"
+#include "AMDILELFWriterInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the AMDILELFWriterInfo class
+//===----------------------------------------------------------------------===//
+AMDILELFWriterInfo::AMDILELFWriterInfo(bool is64bit, bool endian)
+ : TargetELFWriterInfo(is64bit, endian)
+{
+}
+
+AMDILELFWriterInfo::~AMDILELFWriterInfo()
+{
+}
+
+unsigned AMDILELFWriterInfo::getRelocationType(unsigned MachineRelTy) const
+{
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+bool AMDILELFWriterInfo::hasRelocationAddend() const
+{
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return false;
+}
+
+long int AMDILELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const
+{
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+unsigned AMDILELFWriterInfo::getRelocationTySize(unsigned RelTy) const
+{
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+bool AMDILELFWriterInfo::isPCRelativeRel(unsigned RelTy) const
+{
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return false;
+}
+
+unsigned AMDILELFWriterInfo::getAbsoluteLabelMachineRelTy() const
+{
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+long int AMDILELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const
+{
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,57 @@
+//===-- AMDILELFWriterInfo.h ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_ELF_WRITER_INFO_H_
+#define _AMDIL_ELF_WRITER_INFO_H_
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm
+{
+class AMDILELFWriterInfo : public TargetELFWriterInfo
+{
+public:
+ AMDILELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+ virtual ~AMDILELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// 'hasRelocationAddend - True if the target uses and addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const;
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatble field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const;
+};
+} // namespace llvm
+#endif // _AMDIL_ELF_WRITER_INFO_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,591 @@
+//===-- AMDILEnumeratedTypes.td -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ILEnumreatedTypes.td - The IL Enumerated Types - Micah Villmow - 9-3-2008.
+//
+//===----------------------------------------------------------------------===//
+
+class ILShader<bits<8> val> {
+ bits<8> Value = val;
+}
+// Table 5-1
+def IL_SHADER_PIXEL : ILShader<0>;
+def IL_SHADER_COMPUTE : ILShader<1>;
+
+// Section 5.2 IL RegType
+class ILRegType<bits<6> val> {
+ bits<6> Value = val;
+}
+// Table 5-2
+def IL_REGTYPE_TEMP : ILRegType<0>;
+def IL_REGTYPE_WINCOORD : ILRegType<1>;
+def IL_REGTYPE_CONST_BUF : ILRegType<2>;
+def IL_REGTYPE_LITERAL : ILRegType<3>;
+def IL_REGTYPE_ITEMP : ILRegType<4>;
+def IL_REGTYPE_GLOBAL : ILRegType<5>;
+
+// Section 5.3 IL Component Select
+class ILComponentSelect<bits<3> val, string text> {
+ bits<3> Value = val;
+ string Text = text;
+}
+// Table 5-3
+def IL_COMPSEL_X : ILComponentSelect<0, "x">;
+def IL_COMPSEL_Y : ILComponentSelect<1, "y">;
+def IL_COMPSEL_Z : ILComponentSelect<2, "z">;
+def IL_COMPSEL_W : ILComponentSelect<3, "w">;
+def IL_COMPSEL_0 : ILComponentSelect<4, "0">;
+def IL_COMPSEL_1 : ILComponentSelect<5, "1">;
+
+// Section 5.4 IL Mod Dst Comp
+class ILModDstComp<bits<2> val, string text> {
+ bits<2> Value = val;
+ string Text = text;
+}
+// Table 5-4
+def IL_MODCOMP_NOWRITE : ILModDstComp<0, "_">;
+def IL_MODCOMP_WRITE_X : ILModDstComp<1, "x">;
+def IL_MODCOMP_WRITE_y : ILModDstComp<1, "y">;
+def IL_MODCOMP_WRITE_z : ILModDstComp<1, "z">;
+def IL_MODCOMP_WRITE_w : ILModDstComp<1, "w">;
+def IL_MODCOMP_0 : ILModDstComp<2, "0">;
+def IL_MODCOMP_1 : ILModDstComp<3, "1">;
+
+// Section 5.5 IL Import Usage
+class ILImportUsage<bits<1> val, string usage> {
+ bits<1> Value = val;
+ string Text = usage;
+}
+// Table 5-5
+def IL_IMPORTUSAGE_WINCOORD : ILImportUsage<0, "_usage(wincoord)">;
+
+// Section 5.6 Il Shift Scale
+class ILShiftScale<bits<4> val, string scale> {
+ bits<4> Value = val;
+ string Text = scale;
+}
+
+// Table 5-6
+def IL_SHIFT_NONE : ILShiftScale<0, "">;
+def IL_SHIFT_X2 : ILShiftScale<1, "_x2">;
+def IL_SHIFT_X4 : ILShiftScale<2, "_x4">;
+def IL_SHIFT_X8 : ILShiftScale<3, "_x8">;
+def IL_SHIFT_D2 : ILShiftScale<4, "_d2">;
+def IL_SHIFT_D4 : ILShiftScale<5, "_d4">;
+def IL_SHIFT_D8 : ILShiftScale<6, "_d8">;
+
+// Section 5.7 IL Divide Component
+class ILDivComp<bits<3> val, string divcomp> {
+ bits<3> Value = val;
+ string Text = divcomp;
+}
+
+// Table 5-7
+def IL_DIVCOMP_NONE : ILDivComp<0, "_divcomp(none)">;
+def IL_DIVCOMP_Y : ILDivComp<1, "_divcomp(y)">;
+def IL_DIVCOMP_Z : ILDivComp<2, "_divcomp(z)">;
+def IL_DIVCOMP_W : ILDivComp<3, "_divcomp(w)">;
+//def IL_DIVCOMP_UNKNOWN : ILDivComp<4, "_divcomp(unknown)">;
+
+// Section 5.8 IL Relational Op
+class ILRelOp<bits<3> val, string op> {
+ bits<3> Value = val;
+ string Text = op;
+}
+
+// Table 5-8
+def IL_RELOP_EQ : ILRelOp<0, "_relop(eq)">;
+def IL_RELOP_NE : ILRelOp<1, "_relop(ne)">;
+def IL_RELOP_GT : ILRelOp<2, "_relop(gt)">;
+def IL_RELOP_GE : ILRelOp<3, "_relop(ge)">;
+def IL_RELOP_LT : ILRelOp<4, "_relop(lt)">;
+def IL_RELOP_LE : ILRelOp<5, "_relop(le)">;
+
+// Section 5.9 IL Zero Op
+class ILZeroOp<bits<3> val, string behavior> {
+ bits<3> Value = val;
+ string Text = behavior;
+}
+
+// Table 5-9
+def IL_ZEROOP_FLTMAX : ILZeroOp<0, "_zeroop(fltmax)">;
+def IL_ZEROOP_0 : ILZeroOp<1, "_zeroop(zero)">;
+def IL_ZEROOP_INFINITY : ILZeroOp<2, "_zeroop(infinity)">;
+def IL_ZEROOP_INF_ELSE_MAX : ILZeroOp<3, "_zeroop(inf_else_max)">;
+
+// Section 5.10 IL Cmp Value
+class ILCmpValue<bits<3> val, string num> {
+ bits<3> Value = val;
+ string Text = num;
+}
+
+// Table 5-10
+def IL_CMPVAL_0_0 : ILCmpValue<0, "0.0">;
+def IL_CMPVAL_0_5 : ILCmpValue<1, "0.5">;
+def IL_CMPVAL_1_0 : ILCmpValue<2, "1.0">;
+def IL_CMPVAL_NEG_0_5 : ILCmpValue<3, "-0.5">;
+def IL_CMPVAL_NEG_1_0 : ILCmpValue<4, "-1.0">;
+
+// Section 5.11 IL Addressing
+class ILAddressing<bits<3> val> {
+ bits<3> Value = val;
+}
+
+// Table 5-11
+def IL_ADDR_ABSOLUTE : ILAddressing<0>;
+def IL_ADDR_RELATIVE : ILAddressing<1>;
+def IL_ADDR_REG_RELATIVE : ILAddressing<2>;
+
+// Section 5.11 IL Element Format
+class ILElementFormat<bits<5> val> {
+ bits<5> Value = val;
+}
+
+// Table 5-11
+def IL_ELEMENTFORMAT_UNKNOWN : ILElementFormat<0>;
+def IL_ELEMENTFORMAT_SNORM : ILElementFormat<1>;
+def IL_ELEMENTFORMAT_UNORM : ILElementFormat<2>;
+def IL_ELEMENTFORMAT_SINT : ILElementFormat<3>;
+def IL_ELEMENTFORMAT_UINT : ILElementFormat<4>;
+def IL_ELEMENTFORMAT_FLOAT : ILElementFormat<5>;
+def IL_ELEMENTFORMAT_SRGB : ILElementFormat<6>;
+def IL_ELEMENTFORMAT_MIXED : ILElementFormat<7>;
+def IL_ELEMENTFORMAT_Last : ILElementFormat<8>;
+
+// Section 5.12 IL Op Code
+class ILOpCode<bits<16> val = -1, string cmd> {
+ bits<16> Value = val;
+ string Text = cmd;
+}
+
+// Table 5-12
+def IL_DCL_CONST_BUFFER : ILOpCode<0, "dcl_cb">;
+def IL_DCL_INDEXED_TEMP_ARRAY : ILOpCode<1, "dcl_index_temp_array">;
+def IL_DCL_INPUT : ILOpCode<2, "dcl_input">;
+def IL_DCL_LITERAL : ILOpCode<3, "dcl_literal">;
+def IL_DCL_OUTPUT : ILOpCode<4, "dcl_output">;
+def IL_DCL_RESOURCE : ILOpCode<5, "dcl_resource">;
+def IL_OP_ABS : ILOpCode<6, "abs">;
+def IL_OP_ADD : ILOpCode<7, "add">;
+def IL_OP_AND : ILOpCode<8, "iand">;
+def IL_OP_BREAK : ILOpCode<9, "break">;
+def IL_OP_BREAK_LOGICALNZ : ILOpCode<10, "break_logicalnz">;
+def IL_OP_BREAK_LOGICALZ : ILOpCode<11, "break_logicalz">;
+def IL_OP_BREAKC : ILOpCode<12, "breakc">;
+def IL_OP_CALL : ILOpCode<13, "call">;
+def IL_OP_CALL_LOGICALNZ : ILOpCode<14, "call_logicalnz">;
+def IL_OP_CALL_LOGICALZ : ILOpCode<15, "call_logicalz">;
+def IL_OP_CASE : ILOpCode<16, "case">;
+def IL_OP_CLG : ILOpCode<17, "clg">;
+def IL_OP_CMOV : ILOpCode<18, "cmov">;
+def IL_OP_CMOV_LOGICAL : ILOpCode<19, "cmov_logical">;
+def IL_OP_CMP : ILOpCode<20, "cmp">;
+def IL_OP_CONTINUE : ILOpCode<21, "continue">;
+def IL_OP_CONTINUE_LOGICALNZ : ILOpCode<22, "continue_logicalnz">;
+def IL_OP_CONTINUE_LOGICALZ : ILOpCode<23, "continue_logicalz">;
+def IL_OP_CONTINUEC : ILOpCode<24, "continuec">;
+def IL_OP_COS : ILOpCode<25, "cos">;
+def IL_OP_COS_VEC : ILOpCode<26, "cos_vec">;
+def IL_OP_D_2_F : ILOpCode<27, "d2f">;
+def IL_OP_D_ADD : ILOpCode<28, "dadd">;
+def IL_OP_D_EQ : ILOpCode<29, "deq">;
+def IL_OP_D_FRC : ILOpCode<30, "dfrac">;
+def IL_OP_D_FREXP : ILOpCode<31, "dfrexp">;
+def IL_OP_D_GE : ILOpCode<32, "dge">;
+def IL_OP_D_LDEXP : ILOpCode<33, "dldexp">;
+def IL_OP_D_LT : ILOpCode<34, "dlt">;
+def IL_OP_D_MAD : ILOpCode<35, "dmad">;
+def IL_OP_D_MUL : ILOpCode<36, "dmul">;
+def IL_OP_D_NE : ILOpCode<37, "dne">;
+def IL_OP_DEFAULT : ILOpCode<38, "default">;
+def IL_OP_DISCARD_LOGICALNZ : ILOpCode<39, "discard_logicalnz">;
+def IL_OP_DISCARD_LOGICALZ : ILOpCode<40, "discard_logicalz">;
+def IL_OP_DIV : ILOpCode<41, "div_zeroop(infinity)">;
+def IL_OP_DP2 : ILOpCode<42, "dp2">;
+def IL_OP_DP3 : ILOpCode<43, "dp3">;
+def IL_OP_DP4 : ILOpCode<44, "dp4">;
+def IL_OP_ELSE : ILOpCode<45, "else">;
+def IL_OP_END : ILOpCode<46, "end">;
+def IL_OP_ENDFUNC : ILOpCode<47, "endfunc">;
+def IL_OP_ENDIF : ILOpCode<48, "endif">;
+def IL_OP_ENDLOOP : ILOpCode<49, "endloop">;
+def IL_OP_ENDMAIN : ILOpCode<50, "endmain">;
+def IL_OP_ENDSWITCH : ILOpCode<51, "endswitch">;
+def IL_OP_EQ : ILOpCode<52, "eq">;
+def IL_OP_EXP : ILOpCode<53, "exp">;
+def IL_OP_EXP_VEC : ILOpCode<54, "exp_vec">;
+def IL_OP_F_2_D : ILOpCode<55, "f2d">;
+def IL_OP_FLR : ILOpCode<56, "flr">;
+def IL_OP_FRC : ILOpCode<57, "frc">;
+def IL_OP_FTOI : ILOpCode<58, "ftoi">;
+def IL_OP_FTOU : ILOpCode<59, "ftou">;
+def IL_OP_FUNC : ILOpCode<60, "func">;
+def IL_OP_GE : ILOpCode<61, "ge">;
+def IL_OP_I_ADD : ILOpCode<62, "iadd">;
+def IL_OP_I_EQ : ILOpCode<63, "ieq">;
+def IL_OP_I_GE : ILOpCode<64, "ige">;
+def IL_OP_I_LT : ILOpCode<65, "ilt">;
+def IL_OP_I_MAD : ILOpCode<66, "imad">;
+def IL_OP_I_MAX : ILOpCode<67, "imax">;
+def IL_OP_I_MIN : ILOpCode<68, "imin">;
+def IL_OP_I_MUL : ILOpCode<69, "imul">;
+def IL_OP_I_MUL_HIGH : ILOpCode<70, "imul_high">;
+def IL_OP_I_NE : ILOpCode<71, "ine">;
+def IL_OP_I_NEGATE : ILOpCode<72, "inegate">;
+def IL_OP_I_NOT : ILOpCode<73, "inot">;
+def IL_OP_I_OR : ILOpCode<74, "ior">;
+def IL_OP_I_SHL : ILOpCode<75, "ishl">;
+def IL_OP_I_SHR : ILOpCode<76, "ishr">;
+def IL_OP_I_XOR : ILOpCode<77, "ixor">;
+def IL_OP_IF_LOGICALNZ : ILOpCode<78, "if_logicalnz">;
+def IL_OP_IF_LOGICALZ : ILOpCode<79, "if_logicalz">;
+def IL_OP_IFC : ILOpCode<80, "ifc">;
+def IL_OP_ITOF : ILOpCode<81, "itof">;
+def IL_OP_LN : ILOpCode<82, "ln">;
+def IL_OP_LOG : ILOpCode<83, "log">;
+def IL_OP_LOG_VEC : ILOpCode<84, "log_vec">;
+def IL_OP_LOOP : ILOpCode<85, "loop">;
+def IL_OP_LT : ILOpCode<86, "lt">;
+def IL_OP_MAD : ILOpCode<87, "mad_ieee">;
+def IL_OP_MAX : ILOpCode<88, "max_ieee">;
+def IL_OP_MIN : ILOpCode<89, "min_ieee">;
+def IL_OP_MOD : ILOpCode<90, "mod_ieee">;
+def IL_OP_MOV : ILOpCode<91, "mov">;
+def IL_OP_MUL_IEEE : ILOpCode<92, "mul_ieee">;
+def IL_OP_NE : ILOpCode<93, "ne">;
+def IL_OP_NRM : ILOpCode<94, "nrm_nrm4_zeroop(zero)">;
+def IL_OP_POW : ILOpCode<95, "pow">;
+def IL_OP_RCP : ILOpCode<96, "rcp">;
+def IL_OP_RET : ILOpCode<97, "ret">;
+def IL_OP_RET_DYN : ILOpCode<98, "ret_dyn">;
+def IL_OP_RET_LOGICALNZ : ILOpCode<99, "ret_logicalnz">;
+def IL_OP_RET_LOGICALZ : ILOpCode<100, "ret_logicalz">;
+def IL_OP_RND : ILOpCode<101, "rnd">;
+def IL_OP_ROUND_NEAR : ILOpCode<102, "round_nearest">;
+def IL_OP_ROUND_NEG_INF : ILOpCode<103, "round_neginf">;
+def IL_OP_ROUND_POS_INF : ILOpCode<104, "round_plusinf">;
+def IL_OP_ROUND_ZERO : ILOpCode<105, "round_z">;
+def IL_OP_RSQ : ILOpCode<106, "rsq">;
+def IL_OP_RSQ_VEC : ILOpCode<107, "rsq_vec">;
+def IL_OP_SAMPLE : ILOpCode<108, "sample">;
+def IL_OP_SAMPLE_L : ILOpCode<109, "sample_l">;
+def IL_OP_SET : ILOpCode<110, "set">;
+def IL_OP_SGN : ILOpCode<111, "sgn">;
+def IL_OP_SIN : ILOpCode<112, "sin">;
+def IL_OP_SIN_VEC : ILOpCode<113, "sin_vec">;
+def IL_OP_SUB : ILOpCode<114, "sub">;
+def IL_OP_SWITCH : ILOpCode<115, "switch">;
+def IL_OP_TRC : ILOpCode<116, "trc">;
+def IL_OP_U_DIV : ILOpCode<117, "udiv">;
+def IL_OP_U_GE : ILOpCode<118, "uge">;
+def IL_OP_U_LT : ILOpCode<119, "ult">;
+def IL_OP_U_MAD : ILOpCode<120, "umad">;
+def IL_OP_U_MAX : ILOpCode<121, "umax">;
+def IL_OP_U_MIN : ILOpCode<122, "umin">;
+def IL_OP_U_MOD : ILOpCode<123, "umod">;
+def IL_OP_U_MUL : ILOpCode<124, "umul">;
+def IL_OP_U_MUL_HIGH : ILOpCode<125, "umul_high">;
+def IL_OP_U_SHR : ILOpCode<126, "ushr">;
+def IL_OP_UTOF : ILOpCode<127, "utof">;
+def IL_OP_WHILE : ILOpCode<128, "whileloop">;
+// SC IL instructions that are not in CAL IL
+def IL_OP_ACOS : ILOpCode<129, "acos">;
+def IL_OP_ASIN : ILOpCode<130, "asin">;
+def IL_OP_EXN : ILOpCode<131, "exn">;
+def IL_OP_UBIT_REVERSE : ILOpCode<132, "ubit_reverse">;
+def IL_OP_UBIT_EXTRACT : ILOpCode<133, "ubit_extract">;
+def IL_OP_IBIT_EXTRACT : ILOpCode<134, "ibit_extract">;
+def IL_OP_SQRT : ILOpCode<135, "sqrt">;
+def IL_OP_SQRT_VEC : ILOpCode<136, "sqrt_vec">;
+def IL_OP_ATAN : ILOpCode<137, "atan">;
+def IL_OP_TAN : ILOpCode<137, "tan">;
+def IL_OP_D_DIV : ILOpCode<138, "ddiv">;
+def IL_OP_F_NEG : ILOpCode<139, "mov">;
+def IL_OP_GT : ILOpCode<140, "gt">;
+def IL_OP_LE : ILOpCode<141, "lt">;
+def IL_OP_DIST : ILOpCode<142, "dist">;
+def IL_OP_LEN : ILOpCode<143, "len">;
+def IL_OP_MACRO : ILOpCode<144, "mcall">;
+def IL_OP_INTR : ILOpCode<145, "call">;
+def IL_OP_I_FFB_HI : ILOpCode<146, "ffb_hi">;
+def IL_OP_I_FFB_LO : ILOpCode<147, "ffb_lo">;
+def IL_OP_BARRIER : ILOpCode<148, "fence_threads_memory_lds">;
+def IL_OP_BARRIER_LOCAL : ILOpCode<149, "fence_threads_lds">;
+def IL_OP_BARRIER_GLOBAL : ILOpCode<150, "fence_threads_memory">;
+def IL_OP_FENCE : ILOpCode<151, "fence_lds_memory">;
+def IL_OP_FENCE_READ_ONLY : ILOpCode<152, "fence_lds_mem_read_only">;
+def IL_OP_FENCE_WRITE_ONLY : ILOpCode<153, "fence_lds_mem_write_only">;
+def IL_PSEUDO_INST : ILOpCode<154, ";Pseudo Op">;
+def IL_OP_UNPACK_0 : ILOpCode<155, "unpack0">;
+def IL_OP_UNPACK_1 : ILOpCode<156, "unpack1">;
+def IL_OP_UNPACK_2 : ILOpCode<157, "unpack2">;
+def IL_OP_UNPACK_3 : ILOpCode<158, "unpack3">;
+def IL_OP_PI_REDUCE : ILOpCode<159, "pireduce">;
+def IL_OP_IBIT_COUNT : ILOpCode<160, "icbits">;
+def IL_OP_I_FFB_SGN : ILOpCode<161, "ffb_shi">;
+def IL_OP_F2U4 : ILOpCode<162, "f_2_u4">;
+def IL_OP_BIT_ALIGN : ILOpCode<163, "bitalign">;
+def IL_OP_BYTE_ALIGN : ILOpCode<164, "bytealign">;
+def IL_OP_U4_LERP : ILOpCode<165, "u4lerp">;
+def IL_OP_SAD : ILOpCode<166, "sad">;
+def IL_OP_SAD_HI : ILOpCode<167, "sadhi">;
+def IL_OP_SAD4 : ILOpCode<168, "sad4">;
+def IL_OP_UBIT_INSERT : ILOpCode<169, "ubit_insert">;
+def IL_OP_I_CARRY : ILOpCode<170, "icarry">;
+def IL_OP_I_BORROW : ILOpCode<171, "iborrow">;
+def IL_OP_U_MAD24 : ILOpCode<172, "umad24">;
+def IL_OP_U_MUL24 : ILOpCode<173, "umul24">;
+def IL_OP_I_MAD24 : ILOpCode<174, "imad24">;
+def IL_OP_I_MUL24 : ILOpCode<175, "imul24">;
+def IL_OP_CLAMP : ILOpCode<176, "clamp">;
+def IL_OP_LERP : ILOpCode<177, "lrp">;
+def IL_OP_FMA : ILOpCode<178, "fma">;
+def IL_OP_D_MIN : ILOpCode<179, "dmin">;
+def IL_OP_D_MAX : ILOpCode<180, "dmax">;
+def IL_OP_D_SQRT : ILOpCode<181, "dsqrt">;
+def IL_OP_DP2_ADD : ILOpCode<182, "dp2add">;
+def IL_OP_F16_TO_F32 : ILOpCode<183, "f162f">;
+def IL_OP_F32_TO_F16 : ILOpCode<184, "f2f16">;
+def IL_REG_LOCAL_ID_FLAT : ILOpCode<185, "vTidInGrpFlat">;
+def IL_REG_LOCAL_ID : ILOpCode<186, "vTidInGrp">;
+def IL_REG_GLOBAL_ID_FLAT : ILOpCode<187, "vAbsTidFlag">;
+def IL_REG_GLOBAL_ID : ILOpCode<188, "vAbsTid">;
+def IL_REG_GROUP_ID_FLAT : ILOpCode<189, "vThreadGrpIDFlat">;
+def IL_REG_GROUP_ID : ILOpCode<190, "vThreadGrpID">;
+def IL_OP_D_RCP : ILOpCode<191, "drcp_zeroop(infinity)">;
+def IL_OP_D_RSQ : ILOpCode<192, "drsq_zeroop(infinity)">;
+def IL_OP_D_MOV : ILOpCode<193, "dmov">;
+def IL_OP_D_MOVC : ILOpCode<194, "dmovc">;
+def IL_OP_NOP : ILOpCode<195, "nop">;
+def IL_OP_UAV_ADD : ILOpCode<196, "uav_add">;
+def IL_OP_UAV_AND : ILOpCode<197, "uav_and">;
+def IL_OP_UAV_MAX : ILOpCode<198, "uav_max">;
+def IL_OP_UAV_MIN : ILOpCode<199, "uav_min">;
+def IL_OP_UAV_OR : ILOpCode<200, "uav_or">;
+def IL_OP_UAV_RSUB : ILOpCode<201, "uav_rsub">;
+def IL_OP_UAV_SUB : ILOpCode<202, "uav_sub">;
+def IL_OP_UAV_UMAX : ILOpCode<203, "uav_umax">;
+def IL_OP_UAV_UMIN : ILOpCode<204, "uav_umin">;
+def IL_OP_UAV_XOR : ILOpCode<205, "uav_xor">;
+def IL_OP_UAV_INC : ILOpCode<206, "uav_uinc">;
+def IL_OP_UAV_DEC : ILOpCode<207, "uav_udec">;
+def IL_OP_UAV_CMP : ILOpCode<208, "uav_cmp">;
+def IL_OP_UAV_READ_ADD : ILOpCode<209, "uav_read_add">;
+def IL_OP_UAV_READ_AND : ILOpCode<210, "uav_read_and">;
+def IL_OP_UAV_READ_MAX : ILOpCode<211, "uav_read_max">;
+def IL_OP_UAV_READ_MIN : ILOpCode<212, "uav_read_min">;
+def IL_OP_UAV_READ_OR : ILOpCode<213, "uav_read_or">;
+def IL_OP_UAV_READ_RSUB : ILOpCode<214, "uav_read_rsub">;
+def IL_OP_UAV_READ_SUB : ILOpCode<215, "uav_read_sub">;
+def IL_OP_UAV_READ_UMAX : ILOpCode<216, "uav_read_umax">;
+def IL_OP_UAV_READ_UMIN : ILOpCode<217, "uav_read_umin">;
+def IL_OP_UAV_READ_XOR : ILOpCode<218, "uav_read_xor">;
+def IL_OP_UAV_READ_INC : ILOpCode<219, "uav_read_uinc">;
+def IL_OP_UAV_READ_DEC : ILOpCode<220, "uav_read_udec">;
+def IL_OP_UAV_READ_XCHG : ILOpCode<221, "uav_read_xchg">;
+def IL_OP_UAV_READ_CMPXCHG : ILOpCode<222, "uav_read_cmp_xchg">;
+def IL_OP_LDS_ADD : ILOpCode<223, "lds_add">;
+def IL_OP_LDS_AND : ILOpCode<224, "lds_and">;
+def IL_OP_LDS_MAX : ILOpCode<225, "lds_max">;
+def IL_OP_LDS_MIN : ILOpCode<226, "lds_min">;
+def IL_OP_LDS_OR : ILOpCode<227, "lds_or">;
+def IL_OP_LDS_RSUB : ILOpCode<228, "lds_rsub">;
+def IL_OP_LDS_SUB : ILOpCode<229, "lds_sub">;
+def IL_OP_LDS_UMAX : ILOpCode<230, "lds_umax">;
+def IL_OP_LDS_UMIN : ILOpCode<231, "lds_umin">;
+def IL_OP_LDS_XOR : ILOpCode<232, "lds_xor">;
+def IL_OP_LDS_INC : ILOpCode<233, "lds_inc">;
+def IL_OP_LDS_DEC : ILOpCode<234, "lds_dec">;
+def IL_OP_LDS_CMP : ILOpCode<235, "lds_cmp">;
+def IL_OP_LDS_READ_ADD : ILOpCode<236, "lds_read_add">;
+def IL_OP_LDS_READ_AND : ILOpCode<237, "lds_read_and">;
+def IL_OP_LDS_READ_MAX : ILOpCode<238, "lds_read_max">;
+def IL_OP_LDS_READ_MIN : ILOpCode<239, "lds_read_min">;
+def IL_OP_LDS_READ_OR : ILOpCode<240, "lds_read_or">;
+def IL_OP_LDS_READ_RSUB : ILOpCode<241, "lds_read_rsub">;
+def IL_OP_LDS_READ_SUB : ILOpCode<242, "lds_read_sub">;
+def IL_OP_LDS_READ_UMAX : ILOpCode<243, "lds_read_umax">;
+def IL_OP_LDS_READ_UMIN : ILOpCode<244, "lds_read_umin">;
+def IL_OP_LDS_READ_XOR : ILOpCode<245, "lds_read_xor">;
+def IL_OP_LDS_READ_INC : ILOpCode<246, "lds_read_inc">;
+def IL_OP_LDS_READ_DEC : ILOpCode<247, "lds_read_dec">;
+def IL_OP_LDS_READ_XCHG : ILOpCode<248, "lds_read_xchg">;
+def IL_OP_LDS_READ_CMPXCHG : ILOpCode<249, "lds_read_cmp_xchg">;
+def IL_OP_GDS_ADD : ILOpCode<250, "gds_add">;
+def IL_OP_GDS_AND : ILOpCode<251, "gds_and">;
+def IL_OP_GDS_MAX : ILOpCode<252, "gds_max">;
+def IL_OP_GDS_MIN : ILOpCode<253, "gds_min">;
+def IL_OP_GDS_OR : ILOpCode<254, "gds_or">;
+def IL_OP_GDS_RSUB : ILOpCode<255, "gds_rsub">;
+def IL_OP_GDS_SUB : ILOpCode<256, "gds_sub">;
+def IL_OP_GDS_UMAX : ILOpCode<257, "gds_umax">;
+def IL_OP_GDS_UMIN : ILOpCode<258, "gds_umin">;
+def IL_OP_GDS_MSKOR : ILOpCode<259, "gds_mskor">;
+def IL_OP_GDS_XOR : ILOpCode<260, "gds_xor">;
+def IL_OP_GDS_INC : ILOpCode<261, "gds_inc">;
+def IL_OP_GDS_DEC : ILOpCode<262, "gds_dec">;
+def IL_OP_GDS_CMP : ILOpCode<263, "gds_cmp">;
+def IL_OP_GDS_READ_ADD : ILOpCode<264, "gds_read_add">;
+def IL_OP_GDS_READ_AND : ILOpCode<265, "gds_read_and">;
+def IL_OP_GDS_READ_MAX : ILOpCode<266, "gds_read_max">;
+def IL_OP_GDS_READ_MIN : ILOpCode<267, "gds_read_min">;
+def IL_OP_GDS_READ_OR : ILOpCode<268, "gds_read_or">;
+def IL_OP_GDS_READ_RSUB : ILOpCode<269, "gds_read_rsub">;
+def IL_OP_GDS_READ_SUB : ILOpCode<270, "gds_read_sub">;
+def IL_OP_GDS_READ_UMAX : ILOpCode<271, "gds_read_umax">;
+def IL_OP_GDS_READ_UMIN : ILOpCode<272, "gds_read_umin">;
+def IL_OP_GDS_READ_MSKOR : ILOpCode<273, "gds_read_mskor">;
+def IL_OP_GDS_READ_XOR : ILOpCode<274, "gds_read_xor">;
+def IL_OP_GDS_READ_INC : ILOpCode<275, "gds_read_inc">;
+def IL_OP_GDS_READ_DEC : ILOpCode<276, "gds_read_dec">;
+def IL_OP_GDS_READ_XCHG : ILOpCode<277, "gds_read_xchg">;
+def IL_OP_GDS_READ_CMPXCHG : ILOpCode<278, "gds_read_cmp_xchg">;
+def IL_OP_APPEND_BUF_ALLOC : ILOpCode<279, "append_buf_alloc">;
+def IL_OP_APPEND_BUF_CONSUME : ILOpCode<280, "append_buf_consume">;
+def IL_OP_I64_ADD : ILOpCode<281, "i64add">;
+def IL_OP_I64_MAX : ILOpCode<282, "i64max">;
+def IL_OP_U64_MAX : ILOpCode<283, "u64max">;
+def IL_OP_I64_MIN : ILOpCode<284, "i64min">;
+def IL_OP_U64_MIN : ILOpCode<285, "u64min">;
+def IL_OP_I64_NEGATE : ILOpCode<286, "i64negate">;
+def IL_OP_I64_SHL : ILOpCode<287, "i64shl">;
+def IL_OP_I64_SHR : ILOpCode<288, "i64shr">;
+def IL_OP_U64_SHR : ILOpCode<289, "u64shr">;
+def IL_OP_I64_EQ : ILOpCode<290, "i64eq">;
+def IL_OP_I64_GE : ILOpCode<291, "i64ge">;
+def IL_OP_U64_GE : ILOpCode<292, "u64ge">;
+def IL_OP_I64_LT : ILOpCode<293, "i64lt">;
+def IL_OP_U64_LT : ILOpCode<294, "u64lt">;
+def IL_OP_I64_NE : ILOpCode<295, "i64ne">;
+def IL_OP_U_MULHI24 : ILOpCode<296, "umul24_high">;
+def IL_OP_I_MULHI24 : ILOpCode<297, "imul24_high">;
+def IL_OP_GDS_LOAD : ILOpCode<298, "gds_load">;
+def IL_OP_GDS_STORE : ILOpCode<299, "gds_store">;
+def IL_OP_LDS_LOAD : ILOpCode<300, "lds_load">;
+def IL_OP_LDS_LOAD_VEC : ILOpCode<301, "lds_load_vec">;
+def IL_OP_LDS_LOAD_BYTE : ILOpCode<302, "lds_load_byte">;
+def IL_OP_LDS_LOAD_UBYTE : ILOpCode<303, "lds_load_ubyte">;
+def IL_OP_LDS_LOAD_SHORT : ILOpCode<304, "lds_load_short">;
+def IL_OP_LDS_LOAD_USHORT : ILOpCode<305, "lds_load_ushort">;
+def IL_OP_LDS_STORE : ILOpCode<306, "lds_store">;
+def IL_OP_LDS_STORE_VEC : ILOpCode<307, "lds_store_vec">;
+def IL_OP_LDS_STORE_BYTE : ILOpCode<308, "lds_store_byte">;
+def IL_OP_LDS_STORE_SHORT : ILOpCode<309, "lds_store_short">;
+def IL_OP_RAW_UAV_LOAD : ILOpCode<310, "uav_raw_load">;
+def IL_OP_RAW_UAV_STORE : ILOpCode<311, "uav_raw_store">;
+def IL_OP_ARENA_UAV_LOAD : ILOpCode<312, "uav_arena_load">;
+def IL_OP_ARENA_UAV_STORE : ILOpCode<313, "uav_arena_store">;
+def IL_OP_LDS_MSKOR : ILOpCode<314, "lds_mskor">;
+def IL_OP_LDS_READ_MSKOR : ILOpCode<315, "lds_read_mskor">;
+def IL_OP_UAV_BYTE_LOAD : ILOpCode<316, "uav_byte_load">;
+def IL_OP_UAV_UBYTE_LOAD : ILOpCode<317, "uav_ubyte_load">;
+def IL_OP_UAV_SHORT_LOAD : ILOpCode<318, "uav_short_load">;
+def IL_OP_UAV_USHORT_LOAD : ILOpCode<319, "uav_ushort_load">;
+def IL_OP_UAV_BYTE_STORE : ILOpCode<320, "uav_byte_store">;
+def IL_OP_UAV_SHORT_STORE : ILOpCode<320, "uav_short_store">;
+def IL_OP_UAV_STORE : ILOpCode<321, "uav_store">;
+def IL_OP_UAV_LOAD : ILOpCode<322, "uav_load">;
+def IL_OP_MUL : ILOpCode<323, "mul">;
+def IL_OP_DIV_INF : ILOpCode<324, "div_zeroop(infinity)">;
+def IL_OP_DIV_FLTMAX : ILOpCode<325, "div_zeroop(fltmax)">;
+def IL_OP_DIV_ZERO : ILOpCode<326, "div_zeroop(zero)">;
+def IL_OP_DIV_INFELSEMAX : ILOpCode<327, "div_zeroop(inf_else_max)">;
+def IL_OP_FTOI_FLR : ILOpCode<328, "ftoi_flr">;
+def IL_OP_FTOI_RPI : ILOpCode<329, "ftoi_rpi">;
+def IL_OP_F32_TO_F16_NEAR : ILOpCode<330, "f2f16_near">;
+def IL_OP_F32_TO_F16_NEG_INF : ILOpCode<331, "f2f16_neg_inf">;
+def IL_OP_F32_TO_F16_PLUS_INF : ILOpCode<332, "f2f16_plus_inf">;
+def IL_OP_I64_MUL : ILOpCode<333, "i64mul">;
+def IL_OP_U64_MUL : ILOpCode<334, "u64mul">;
+def IL_OP_LDEXP : ILOpCode<335, "fldexp">;
+def IL_OP_FREXP_EXP : ILOpCode<336, "frexp_exp">;
+def IL_OP_FREXP_MANT : ILOpCode<337, "frexp_mant">;
+def IL_OP_D_FREXP_EXP : ILOpCode<338, "dfrexp_exp">;
+def IL_OP_D_FREXP_MANT : ILOpCode<339, "dfrexp_mant">;
+def IL_OP_DTOI : ILOpCode<340, "dtoi">;
+def IL_OP_DTOU : ILOpCode<341, "dtou">;
+def IL_OP_ITOD : ILOpCode<342, "itod">;
+def IL_OP_UTOD : ILOpCode<343, "utod">;
+def IL_OP_MIN3 : ILOpCode<344, "min3">;
+def IL_OP_MAX3 : ILOpCode<345, "max3">;
+def IL_OP_MED3 : ILOpCode<346, "med3">;
+def IL_OP_I_MIN3 : ILOpCode<347, "imin3">;
+def IL_OP_I_MAX3 : ILOpCode<348, "imax3">;
+def IL_OP_I_MED3 : ILOpCode<349, "imed3">;
+def IL_OP_U_MIN3 : ILOpCode<350, "umin3">;
+def IL_OP_U_MAX3 : ILOpCode<351, "umax3">;
+def IL_OP_U_MED3 : ILOpCode<352, "umed3">;
+def IL_OP_CLASS : ILOpCode<353, "class">;
+def IL_OP_D_CLASS : ILOpCode<354, "dclass">;
+def IL_OP_CU_ID : ILOpCode<355, "cu_id">;
+def IL_OP_WAVE_ID : ILOpCode<356, "wave_id">;
+def IL_OP_I64_SUB : ILOpCode<357, "i64sub">;
+def IL_OP_I64_DIV : ILOpCode<358, "i64div">;
+def IL_OP_U64_DIV : ILOpCode<359, "u64div">;
+def IL_OP_I64_MOD : ILOpCode<360, "i64mod">;
+def IL_OP_U64_MOD : ILOpCode<361, "u64mod">;
+def IL_DCL_GWS_THREAD_COUNT : ILOpCode<362, "dcl_gws_thread_count">;
+def IL_DCL_SEMAPHORE : ILOpCode<363, "dcl_semaphore">;
+def IL_OP_SEMAPHORE_INIT : ILOpCode<364, "init_semaphore">;
+def IL_OP_SEMAPHORE_WAIT : ILOpCode<365, "semaphore_wait">;
+def IL_OP_SEMAPHORE_SIGNAL : ILOpCode<366, "semaphore_signal">;
+def IL_OP_BARRIER_REGION : ILOpCode<367, "fence_threads_gds">;
+def IL_OP_MSAD : ILOpCode<368, "msad">;
+def IL_OP_QSAD : ILOpCode<369, "qsad">;
+def IL_OP_MQSAD : ILOpCode<370, "mqsad">;
+def IL_OP_D_TRIG_PREOP : ILOpCode<371, "dtrig_preop">;
+def IL_OP_ADD_RTE : ILOpCode<372, "add_rte">;
+def IL_OP_ADD_RTP : ILOpCode<373, "add_rtp">;
+def IL_OP_ADD_RTN : ILOpCode<374, "add_rtn">;
+def IL_OP_ADD_RTZ : ILOpCode<375, "add_rtz">;
+def IL_OP_SUB_RTE : ILOpCode<376, "sub_rte">;
+def IL_OP_SUB_RTP : ILOpCode<377, "sub_rtp">;
+def IL_OP_SUB_RTN : ILOpCode<378, "sub_rtn">;
+def IL_OP_SUB_RTZ : ILOpCode<379, "sub_rtz">;
+def IL_OP_MUL_RTE : ILOpCode<380, "mul_rte">;
+def IL_OP_MUL_RTP : ILOpCode<381, "mul_rtp">;
+def IL_OP_MUL_RTN : ILOpCode<382, "mul_rtn">;
+def IL_OP_MUL_RTZ : ILOpCode<383, "mul_rtz">;
+def IL_OP_MAD_RTE : ILOpCode<384, "mad_rte">;
+def IL_OP_MAD_RTP : ILOpCode<385, "mad_rtp">;
+def IL_OP_MAD_RTN : ILOpCode<386, "mad_rtn">;
+def IL_OP_MAD_RTZ : ILOpCode<387, "mad_rtz">;
+def IL_OP_FMA_RTE : ILOpCode<388, "fma_rte">;
+def IL_OP_FMA_RTP : ILOpCode<389, "fma_rtp">;
+def IL_OP_FMA_RTN : ILOpCode<390, "fma_rtn">;
+def IL_OP_FMA_RTZ : ILOpCode<391, "fma_rtz">;
+def IL_OP_D_ADD_RTE : ILOpCode<392, "dadd_rte">;
+def IL_OP_D_ADD_RTP : ILOpCode<393, "dadd_rtp">;
+def IL_OP_D_ADD_RTN : ILOpCode<394, "dadd_rtn">;
+def IL_OP_D_ADD_RTZ : ILOpCode<395, "dadd_rtz">;
+def IL_OP_D_SUB_RTE : ILOpCode<396, "dsub_rte">;
+def IL_OP_D_SUB_RTP : ILOpCode<397, "dsub_rtp">;
+def IL_OP_D_SUB_RTN : ILOpCode<398, "dsub_rtn">;
+def IL_OP_D_SUB_RTZ : ILOpCode<399, "dsub_rtz">;
+def IL_OP_D_MUL_RTE : ILOpCode<400, "dmul_rte">;
+def IL_OP_D_MUL_RTP : ILOpCode<401, "dmul_rtp">;
+def IL_OP_D_MUL_RTN : ILOpCode<402, "dmul_rtn">;
+def IL_OP_D_MUL_RTZ : ILOpCode<403, "dmul_rtz">;
+def IL_OP_D_MAD_RTE : ILOpCode<404, "dmad_rte">;
+def IL_OP_D_MAD_RTP : ILOpCode<405, "dmad_rtp">;
+def IL_OP_D_MAD_RTN : ILOpCode<406, "dmad_rtn">;
+def IL_OP_D_MAD_RTZ : ILOpCode<407, "dmad_rtz">;
+def IL_OP_SAD_U16 : ILOpCode<408, "sad_u16">;
+def IL_OP_SAD_U32 : ILOpCode<409, "sad_u32">;
+def IL_OP_D_ABS : ILOpCode<400, "dabs">;
+def IL_OP_DIV_PRECISE : ILOpCode<411, "div_precise">;
+def IL_OP_LOAD : ILOpCode<412, "load">;
+def IL_OP_BFI : ILOpCode<413, "bfi">;
+def IL_OP_BFM : ILOpCode<414, "bfm">;
+def IL_OP_GDS_LOAD_BYTE : ILOpCode<415, "gds_load_byte">;
+def IL_OP_GDS_LOAD_UBYTE : ILOpCode<416, "gds_load_ubyte">;
+def IL_OP_GDS_LOAD_SHORT : ILOpCode<417, "gds_load_short">;
+def IL_OP_GDS_LOAD_USHORT : ILOpCode<418, "gds_load_ushort">;
+def IL_OP_GDS_STORE_BYTE : ILOpCode<419, "gds_store_byte">;
+def IL_OP_GDS_STORE_SHORT : ILOpCode<420, "gds_store_short">;
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,236 @@
+//===-- AMDILEvergreenDevice.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILEvergreenDevice.h"
+#include "AMDILEGAsmPrinter.h"
+#include "AMDILIOExpansion.h"
+#include "AMDILPointerManager.h"
+using namespace llvm;
+
+AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
+ : AMDILDevice(ST)
+{
+ setCaps();
+ std::string name = ST->getDeviceName();
+ if (name == "cedar") {
+ mDeviceFlag = OCL_DEVICE_CEDAR;
+ } else if (name == "redwood") {
+ mDeviceFlag = OCL_DEVICE_REDWOOD;
+ } else if (name == "cypress") {
+ mDeviceFlag = OCL_DEVICE_CYPRESS;
+ } else {
+ mDeviceFlag = OCL_DEVICE_JUNIPER;
+ }
+}
+
+AMDILEvergreenDevice::~AMDILEvergreenDevice()
+{
+}
+
+size_t AMDILEvergreenDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+size_t AMDILEvergreenDevice::getMaxGDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return MAX_GDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const
+{
+ return 12;
+}
+
+uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const
+{
+ switch(id) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+ return GLOBAL_RETURN_RAW_UAV_ID;
+ } else {
+ return DEFAULT_RAW_UAV_ID;
+ }
+ case GLOBAL_ID:
+ case ARENA_UAV_ID:
+ return DEFAULT_ARENA_UAV_ID;
+ case LDS_ID:
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case GDS_ID:
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case SCRATCH_ID:
+ if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ };
+ return 0;
+}
+
+size_t AMDILEvergreenDevice::getWavefrontSize() const
+{
+ return AMDILDevice::WavefrontSize;
+}
+
+uint32_t AMDILEvergreenDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD5XXX;
+}
+
+void AMDILEvergreenDevice::setCaps()
+{
+ mHWBits.set(AMDILDeviceInfo::ByteGDSOps);
+ mSWBits.reset(AMDILDeviceInfo::ByteGDSOps);
+
+ mSWBits.set(AMDILDeviceInfo::ArenaSegment);
+ mHWBits.set(AMDILDeviceInfo::ArenaUAV);
+ mHWBits.set(AMDILDeviceInfo::Semaphore);
+ if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
+ mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
+ }
+ mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
+ if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
+ mHWBits.set(AMDILDeviceInfo::ByteStores);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+ mSWBits.set(AMDILDeviceInfo::RegionMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::LocalMem);
+ mHWBits.set(AMDILDeviceInfo::RegionMem);
+ }
+ if (!mSTM->isApple()) {
+ if (mSTM->isOverride(AMDILDeviceInfo::Images)) {
+ mHWBits.set(AMDILDeviceInfo::Images);
+ }
+ } else {
+ mHWBits.set(AMDILDeviceInfo::Images);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+ mHWBits.set(AMDILDeviceInfo::CachedMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
+ mHWBits.set(AMDILDeviceInfo::MultiUAV);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_SC_136) {
+ mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
+ mHWBits.set(AMDILDeviceInfo::ArenaVectors);
+ } else {
+ mSWBits.set(AMDILDeviceInfo::ArenaVectors);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_SC_137) {
+ mHWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.reset(AMDILDeviceInfo::LongOps);
+ }
+ mHWBits.set(AMDILDeviceInfo::TmrReg);
+}
+
+FunctionPass*
+AMDILEvergreenDevice::getIOExpansion(
+ TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+ return new AMDILEGIOExpansion(TM, OptLevel);
+}
+
+AsmPrinter*
+AMDILEvergreenDevice::getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const
+{
+ return new AMDILEGAsmPrinter(ASM_PRINTER_ARGUMENTS);
+}
+
+FunctionPass*
+AMDILEvergreenDevice::getPointerManager(
+ TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+ return new AMDILEGPointerManager(TM, OptLevel);
+}
+
+AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST)
+{
+ setCaps();
+}
+
+AMDILCypressDevice::~AMDILCypressDevice()
+{
+}
+
+void AMDILCypressDevice::setCaps()
+{
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ mHWBits.set(AMDILDeviceInfo::FMA);
+ }
+}
+
+
+AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST)
+{
+ setCaps();
+}
+
+AMDILCedarDevice::~AMDILCedarDevice()
+{
+}
+
+void AMDILCedarDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILCedarDevice::getWavefrontSize() const
+{
+ return AMDILDevice::QuarterWavefrontSize;
+}
+
+AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST)
+{
+ setCaps();
+}
+
+AMDILRedwoodDevice::~AMDILRedwoodDevice()
+{
+}
+
+void AMDILRedwoodDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILRedwoodDevice::getWavefrontSize() const
+{
+ return AMDILDevice::HalfWavefrontSize;
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,94 @@
+//===-- AMDILEvergreenDevice.h --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILEVERGREENDEVICE_H_
+#define _AMDILEVERGREENDEVICE_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+namespace llvm
+{
+class AMDILSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+// The AMDILEvergreenDevice is the base device class for all of the Evergreen
+// series of cards. This class contains information required to differentiate
+// the Evergreen device from the generic AMDILDevice. This device represents
+// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDILEvergreenDevice : public AMDILDevice
+{
+public:
+ AMDILEvergreenDevice(AMDILSubtarget *ST);
+ virtual ~AMDILEvergreenDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getMaxGDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getMaxNumUAVs() const;
+ virtual uint32_t getResourceID(uint32_t) const;
+ virtual FunctionPass*
+ getIOExpansion(TargetMachine&, CodeGenOpt::Level) const;
+ virtual AsmPrinter*
+ getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const;
+ virtual FunctionPass*
+ getPointerManager(TargetMachine&, CodeGenOpt::Level) const;
+protected:
+ virtual void setCaps();
+}; // AMDILEvergreenDevice
+
+// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has
+// support for double precision operations. This device is used to represent
+// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+// and HD59XX cards.
+class AMDILCypressDevice : public AMDILEvergreenDevice
+{
+public:
+ AMDILCypressDevice(AMDILSubtarget *ST);
+ virtual ~AMDILCypressDevice();
+private:
+ virtual void setCaps();
+}; // AMDILCypressDevice
+
+
+// The AMDILCedarDevice is the class that represents all of the 'Cedar' based
+// devices. This class differs from the base AMDILEvergreenDevice in that the
+// device is a ~quarter of the 'Juniper'. These are commercially known as the
+// HD54XX and HD53XX series of cards.
+class AMDILCedarDevice : public AMDILEvergreenDevice
+{
+public:
+ AMDILCedarDevice(AMDILSubtarget *ST);
+ virtual ~AMDILCedarDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDILCedarDevice
+
+// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based
+// devices. This class differs from the base class, in that these devices are
+// considered about half of a 'Juniper' device. These are commercially known as
+// the HD55XX and HD56XX series of cards.
+class AMDILRedwoodDevice : public AMDILEvergreenDevice
+{
+public:
+ AMDILRedwoodDevice(AMDILSubtarget *ST);
+ virtual ~AMDILRedwoodDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDILRedwoodDevice
+
+} // namespace llvm
+#endif // _AMDILEVERGREENDEVICE_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,32 @@
+//===-- AMDILFixupKinds.h -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure This file is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AMDIL_AMDILFIXUPKINDS_H
+#define LLVM_AMDIL_AMDILFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm
+{
+namespace AMDIL
+{
+enum Fixups {
+ reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch.
+ reloc_riprel_4byte, // 32-bit rip-relative
+};
+}
+}
+
+#endif
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,543 @@
+//===-- AMDILFormats.td ---------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AMDILTokenDesc.td"
+
+//===--------------------------------------------------------------------===//
+// The parent IL instruction class that inherits the Instruction class. This
+// class sets the corresponding namespace, the out and input dag lists the
+// pattern to match to and the string to print out for the assembly printer.
+//===--------------------------------------------------------------------===//
+class ILFormat<ILOpCode op, dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+ let Namespace = "AMDIL";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ ILOpCode operation = op;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// The base class for vector insert instructions. It is a single dest, quad
+// source instruction where the last two source operands must be 32bit
+// immediate values that are encoding the swizzle of the source register
+// The src2 and src3 instructions must also be inversion of each other such
+// that if src2 is 0x1000300(x0z0), src3 must be 0x20004(0y0w). The values
+// are encoded as 32bit integer with each 8 char representing a swizzle value.
+// The encoding is as follows for 32bit register types:
+// 0x00 -> '_'
+// 0x01 -> 'x'
+// 0x02 -> 'y'
+// 0x03 -> 'z'
+// 0x04 -> 'w'
+// 0x05 -> 'x'
+// 0x06 -> 'y'
+// 0x07 -> 'z'
+// 0x08 -> 'w'
+// 0x09 -> '0'
+// The encoding is as follows for 64bit register types:
+// 0x00 -> "__"
+// 0x01 -> "xy"
+// 0x02 -> "zw"
+// 0x03 -> "xy"
+// 0x04 -> "zw"
+// 0x05 -> "00"
+//===--------------------------------------------------------------------===//
+class InsertVectorClass<ILOpCode op, RegisterClass DReg, RegisterClass SReg,
+ SDNode OpNode, string asmstr> :
+ ILFormat<op, (outs DReg:$dst),
+ (ins DReg:$src0, SReg:$src1, i32imm:$src2, i32imm:$src3),
+ !strconcat(asmstr, " $dst, $src0, $src1"),
+ [(set DReg:$dst, (OpNode DReg:$src0, SReg:$src1,
+ timm:$src2, timm:$src3))]>;
+
+//===--------------------------------------------------------------------===//
+// Class that has one input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0" and
+// handles the unary math operators.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative for input and output register 0.
+//===--------------------------------------------------------------------===//
+class OneInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ILFormat<op, outs, ins, asmstr, pattern>
+{
+ ILDst dst_reg;
+ ILDstMod dst_mod;
+ ILRelAddr dst_rel;
+ ILSrc dst_reg_rel;
+ ILSrcMod dst_reg_rel_mod;
+ ILSrc src0_reg;
+ ILSrcMod src0_mod;
+ ILRelAddr src0_rel;
+ ILSrc src0_reg_rel;
+ ILSrcMod src0_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// A simplified version of OneInOneOut class where the pattern is standard
+// and does not need special cases. This requires that the pattern has
+// a SDNode and takes a source and destination register that is of type
+// RegisterClass. This is the standard unary op class.
+//===--------------------------------------------------------------------===//
+class UnaryOp<ILOpCode op, SDNode OpNode,
+ RegisterClass dRegs, RegisterClass sRegs>
+ : OneInOneOut<op, (outs dRegs:$dst), (ins sRegs:$src),
+ !strconcat(op.Text, " $dst, $src"),
+ [(set dRegs:$dst, (OpNode sRegs:$src))]>;
+
+//===--------------------------------------------------------------------===//
+// This class is similiar to the UnaryOp class, however, there is no
+// result value to assign.
+//===--------------------------------------------------------------------===//
+class UnaryOpNoRet<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ILFormat<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src0_reg;
+ ILSrcMod src0_mod;
+ ILRelAddr src0_rel;
+ ILSrc src0_reg_rel;
+ ILSrcMod src0_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have two input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1" and
+// handles the binary math operators and comparison operations.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative for input register 1.
+//===--------------------------------------------------------------------===//
+class TwoInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : OneInOneOut<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src1_reg;
+ ILSrcMod src1_mod;
+ ILRelAddr src1_rel;
+ ILSrc src1_reg_rel;
+ ILSrcMod src1_reg_rel_mod;
+}
+//===--------------------------------------------------------------------===//
+// A simplification of the TwoInOneOut pattern for Binary Operations.
+// This class is a helper class that assumes the simple pattern of
+// $dst = op $src0 $src1.
+// Other type of matching patterns need to use the TwoInOneOut class.
+//===--------------------------------------------------------------------===//
+class BinaryOp<ILOpCode op, SDNode OpNode, RegisterClass dReg,
+ RegisterClass sReg0, RegisterClass sReg1>
+ : TwoInOneOut<op, (outs dReg:$dst), (ins sReg0:$src0, sReg1:$src1),
+ !strconcat(op.Text, " $dst, $src0, $src1"),
+ [(set dReg:$dst, (OpNode sReg0:$src0, sReg1:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// The base class for vector extract instructions. The vector extract
+// instructions take as an input value a source register and a 32bit integer
+// with the same encoding as specified in InsertVectorClass and produces
+// a result with only the swizzled component in the destination register.
+//===--------------------------------------------------------------------===//
+class ExtractVectorClass<RegisterClass DReg, RegisterClass SReg, SDNode OpNode>
+: TwoInOneOut<IL_OP_MOV, (outs DReg:$dst), (ins SReg:$src0, i32imm:$src1),
+ "mov $dst, $src0",
+ [(set DReg:$dst, (OpNode SReg:$src0, timm:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// The base class for vector concatenation. This class creates either a vec2
+// or a vec4 of 32bit data types or a vec2 of 64bit data types. This is done
+// by swizzling either the 'x' or 'xy' components of the source operands
+// into the destination register.
+//===--------------------------------------------------------------------===//
+class VectorConcatClass<RegisterClass Dst, RegisterClass Src, SDNode OpNode>
+ : TwoInOneOut<IL_OP_I_ADD, (outs Dst:$dst), (ins Src:$src0, Src:$src1),
+ "iadd $dst, $src0, $src1",
+ [(set Dst:$dst, (OpNode Src:$src0, Src:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// Similiar to the UnaryOpNoRet class, but takes as arguments two input
+// operands. Used mainly for barrier instructions on PC platform.
+//===--------------------------------------------------------------------===//
+class BinaryOpNoRet<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : UnaryOpNoRet<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src1_reg;
+ ILSrcMod src1_mod;
+ ILRelAddr src1_rel;
+ ILSrc src1_reg_rel;
+ ILSrcMod src1_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have three input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
+// handles the mad and conditional mov instruction.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative.
+// This class is the parent class of TernaryOp
+//===--------------------------------------------------------------------===//
+class ThreeInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : TwoInOneOut<op, outs, ins, asmstr, pattern> {
+ ILSrc src2_reg;
+ ILSrcMod src2_mod;
+ ILRelAddr src2_rel;
+ ILSrc src2_reg_rel;
+ ILSrcMod src2_reg_rel_mod;
+ }
+
+//===--------------------------------------------------------------------===//
+// The g version of the Three Input pattern uses a standard pattern but
+// but allows specification of the register to further generalize the class
+// This class is mainly used in the generic multiclasses in AMDILMultiClass.td
+//===--------------------------------------------------------------------===//
+class TernaryOp<ILOpCode op, SDNode OpNode,
+ RegisterClass dReg,
+ RegisterClass sReg0,
+ RegisterClass sReg1,
+ RegisterClass sReg2>
+ : ThreeInOneOut<op, (outs dReg:$dst),
+ (ins sReg0:$src0, sReg1:$src1, sReg2:$src2),
+ !strconcat(op.Text, " $dst, $src0, $src1, $src2"),
+ [(set dReg:$dst,
+ (OpNode sReg0:$src0, sReg1:$src1, sReg2:$src2))]>;
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have three input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
+// handles the mad and conditional mov instruction.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative.
+// This class is the parent class of TernaryOp
+//===--------------------------------------------------------------------===//
+class FourInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ThreeInOneOut<op, outs, ins, asmstr, pattern> {
+ ILSrc src3_reg;
+ ILSrcMod src3_mod;
+ ILRelAddr src3_rel;
+ ILSrc src3_reg_rel;
+ ILSrcMod src3_reg_rel_mod;
+ }
+
+
+//===--------------------------------------------------------------------===//
+// The macro class that is an extension of OneInOneOut but is tailored for
+// macros only where all the register types are the same
+//===--------------------------------------------------------------------===//
+class UnaryMacro<RegisterClass Dst, RegisterClass Src0, SDNode OpNode>
+: OneInOneOut<IL_OP_MACRO, (outs Dst:$dst),
+ (ins Src0:$src0),
+ "($dst),($src0)",
+ [(set Dst:$dst, (OpNode Src0:$src0))]>;
+
+//===--------------------------------------------------------------------===//
+// The macro class is an extension of TwoInOneOut but is tailored for
+// macros only where all the register types are the same
+//===--------------------------------------------------------------------===//
+class BinaryMacro<RegisterClass Dst,
+ RegisterClass Src0,
+ RegisterClass Src1,
+ SDNode OpNode>
+ : TwoInOneOut<IL_OP_MACRO, (outs Dst:$dst),
+ (ins Src0: $src0, Src1:$src1),
+ "($dst),($src0, $src1)",
+ [(set Dst:$dst, (OpNode Src0:$src0, Src1:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// Classes for dealing with atomic instructions w/ 32bit pointers
+//===--------------------------------------------------------------------===//
+class Append<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEM3232:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst")),
+ [(set GPRI32:$dst, (intr ADDR:$id))]>;
+
+
+class UniAtom<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEM3232:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, timm:$id))]>;
+
+
+class UniAtomNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs), (ins MEM3232:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr")),
+ [(intr ADDR:$ptr, timm:$id)]>;
+
+class BinAtom<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEM3232:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, timm:$id))]>;
+
+
+class BinAtomNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs), (ins MEM3232:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src")),
+ [(intr ADDR:$ptr, GPRI32:$src, timm:$id)]>;
+
+class TriAtom<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEM3232:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src, $src1")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class CmpXChg<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEM3232:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src1, $src")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class TriAtomNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs),
+ (ins MEM3232:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src, $src1")),
+ [(intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id)]>;
+
+class CmpXChgNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs),
+ (ins MEM3232:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
+ [(intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id)]>;
+
+
+class UniAtomI64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI64:$dst),
+ (ins MEM3232:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, ${ptr}0")),
+ [(set GPRI64:$dst, (intr ADDR:$ptr, timm:$id))]>;
+
+
+class UniAtomNoRetI64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs), (ins MEM3232:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," ${ptr}0")),
+ [(intr ADDR:$ptr, timm:$id)]>;
+
+class BinAtomI64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI64:$dst),
+ (ins MEM3232:$ptr, GPRI64:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, ${ptr}0, $src")),
+ [(set GPRI64:$dst, (i64 (intr ADDR:$ptr, GPRI64:$src, timm:$id)))]>;
+
+
+class BinAtomNoRetI64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs), (ins MEM3232:$ptr, GPRI64:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," ${ptr}0, $src")),
+ [(intr ADDR:$ptr, GPRI64:$src, timm:$id)]>;
+
+class TriAtomI64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI64:$dst),
+ (ins MEM3232:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, ${ptr}0, $src, $src1")),
+ [(set GPRI64:$dst, (intr ADDR:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id))]>;
+
+class CmpXChgI64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI64:$dst),
+ (ins MEM3232:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, ${ptr}0, $src1, $src")),
+ [(set GPRI64:$dst, (intr ADDR:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id))]>;
+
+class TriAtomNoRetI64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs),
+ (ins MEM3232:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," ${ptr}0, $src, $src1")),
+ [(intr ADDR:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id)]>;
+
+class CmpXChgNoRetI64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs),
+ (ins MEM3232:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," ${ptr}0, $src1, $src")),
+ [(intr ADDR:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id)]>;
+
+
+//===--------------------------------------------------------------------===//
+// Classes for dealing with atomic instructions w/ 64bit pointers
+//===--------------------------------------------------------------------===//
+class Append64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEM6464:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst")),
+ [(set GPRI32:$dst, (intr ADDR64:$id))]>;
+
+
+class UniAtom64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEM6464:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, timm:$id))]>;
+
+
+class UniAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs), (ins MEM6464:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr")),
+ [(intr ADDR64:$ptr, timm:$id)]>;
+
+class BinAtom64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEM6464:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, timm:$id))]>;
+
+
+class BinAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs), (ins MEM6464:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src")),
+ [(intr ADDR64:$ptr, GPRI32:$src, timm:$id)]>;
+
+class TriAtom64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEM6464:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src, $src1")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class CmpXChg64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEM6464:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src1, $src")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class TriAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs),
+ (ins MEM6464:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src, $src1")),
+ [(intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id)]>;
+
+class CmpXChgNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs),
+ (ins MEM6464:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
+ [(intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id)]>;
+
+
+class UniAtom64I64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI64:$dst),
+ (ins MEM6464:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr")),
+ [(set GPRI64:$dst, (intr ADDR64:$ptr, timm:$id))]>;
+
+
+class UniAtomNoRet64I64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs), (ins MEM6464:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr")),
+ [(intr ADDR64:$ptr, timm:$id)]>;
+
+class BinAtom64I64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI64:$dst),
+ (ins MEM6464:$ptr, GPRI64:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src")),
+ [(set GPRI64:$dst, (intr ADDR64:$ptr, GPRI64:$src, timm:$id))]>;
+
+
+class BinAtomNoRet64I64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs), (ins MEM6464:$ptr, GPRI64:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src")),
+ [(intr ADDR64:$ptr, GPRI64:$src, timm:$id)]>;
+
+class TriAtom64I64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI64:$dst),
+ (ins MEM6464:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src, $src1")),
+ [(set GPRI64:$dst, (intr ADDR64:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id))]>;
+
+class CmpXChg64I64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI64:$dst),
+ (ins MEM6464:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src1, $src")),
+ [(set GPRI64:$dst, (intr ADDR64:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id))]>;
+
+class TriAtomNoRet64I64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs),
+ (ins MEM6464:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src, $src1")),
+ [(intr ADDR64:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id)]>;
+
+class CmpXChgNoRet64I64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs),
+ (ins MEM6464:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
+ [(intr ADDR64:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id)]>;
+//===--------------------------------------------------------------------===//
+// Intrinsic classes
+// Generic versions of the above classes but for Target specific intrinsics
+// instead of SDNode patterns.
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ class VoidIntLong :
+ Intrinsic<[llvm_i64_ty], [], []>;
+ class VoidIntInt :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class VoidIntBool :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class UnaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>;
+ class UnaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], []>;
+ class ConvertIntFTOI :
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], []>;
+ class ConvertIntITOF :
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty], []>;
+ class BinaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class BinaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
+ class TernaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class TernaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class QuaternaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class UnaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ class VoidAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+ class UnaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+
+class UnaryAtomicLong :
+ Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadWriteArgMem]>;
+class BinaryAtomicLong :
+ Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], [IntrReadWriteArgMem]>;
+class TernaryAtomicLong :
+ Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty]>;
+class VoidAtomicLongNoRet :
+ Intrinsic<[], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+class UnaryAtomicLongNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrReadWriteArgMem]>;
+class BinaryAtomicLongNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], [IntrReadWriteArgMem]>;
+class TernaryAtomicLongNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrReadWriteArgMem]>;
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,55 @@
+//===-- AMDILFrameLowering.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILFrameLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+using namespace llvm;
+AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl,
+ int LAO, unsigned TransAl)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl)
+{
+}
+
+AMDILFrameLowering::~AMDILFrameLowering()
+{
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index.
+int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const
+{
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI);
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
+{
+ NumEntries = 0;
+ return 0;
+}
+void
+AMDILFrameLowering::emitPrologue(MachineFunction &MF) const
+{
+}
+void
+AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+}
+bool
+AMDILFrameLowering::hasFP(const MachineFunction &MF) const
+{
+ return false;
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,42 @@
+//===-- AMDILFrameLowering.h ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure This file is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILFRAME_LOWERING_H_
+#define _AMDILFRAME_LOWERING_H_
+#include "AMDIL.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+/// Information about the stack frame layout on the AMDIL targets. It holds
+/// the direction of the stack growth, the known stack alignment on entry to
+/// each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+
+namespace llvm
+{
+class AMDILFrameLowering : public TargetFrameLowering
+{
+public:
+ AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
+ TransAl = 1);
+ virtual ~AMDILFrameLowering();
+ virtual int getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const;
+ virtual const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+}; // class AMDILFrameLowering
+} // namespace llvm
+#endif // _AMDILFRAME_LOWERING_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1337 @@
+//===-- AMDILIOExpansion.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDIL IO Expansion class expands pseudo IO instructions into a sequence
+// of instructions that produces the correct results. These instructions are not
+// expanded earlier in the pass because any pass before this can assume to be able to
+// generate a load store instruction. So this pass can only have passes that execute
+// after it if no load store instructions can be generated.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILIOExpansion.h"
+#include "AMDIL.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+char AMDILIOExpansion::ID = 0;
+namespace llvm
+{
+FunctionPass*
+createAMDILIOExpansion(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+{
+ return TM.getSubtarget<AMDILSubtarget>()
+ .device()->getIOExpansion(TM, OptLevel);
+}
+}
+
+AMDILIOExpansion::AMDILIOExpansion(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel) :
+ MachineFunctionPass(ID), TM(tm)
+{
+ mSTM = &tm.getSubtarget<AMDILSubtarget>();
+ mDebug = DEBUGME;
+ mTII = tm.getInstrInfo();
+ mKM = NULL;
+}
+
+AMDILIOExpansion::~AMDILIOExpansion()
+{
+}
+
+bool
+AMDILIOExpansion::runOnMachineFunction(MachineFunction &MF)
+{
+ mKM = const_cast<AMDILKernelManager*>(mSTM->getKernelManager());
+ mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBI = MBB->begin(), MBE = MBB->end();
+ MBI != MBE; ++MBI) {
+ MachineInstr *MI = MBI;
+ if (isIOInstruction(MI)) {
+ mBB = MBB;
+ saveInst = false;
+ expandIOInstruction(MI);
+ if (!saveInst) {
+ // erase returns the instruction after
+ // and we want the instruction before
+ MBI = MBB->erase(MI);
+ --MBI;
+ }
+ }
+ }
+ }
+ return false;
+}
+const char *AMDILIOExpansion::getPassName() const
+{
+ return "AMDIL Generic IO Expansion Pass";
+}
+bool
+AMDILIOExpansion::isIOInstruction(MachineInstr *MI)
+{
+ if (!MI) {
+ return false;
+ }
+ if (isLoadInst(TM, MI) || isStoreInst(TM, MI)) {
+ return true;
+ }
+ return false;
+}
+void
+AMDILIOExpansion::expandIOInstruction(MachineInstr *MI)
+{
+ assert(isIOInstruction(MI) && "Must be an IO instruction to "
+ "be passed to this function!");
+ if (isLoadInst(TM, MI)) {
+ if (isGlobalInst(TM, MI)) {
+ expandGlobalLoad(MI);
+ } else if (isRegionInst(TM, MI)) {
+ expandRegionLoad(MI);
+ } else if (isPrivateInst(TM, MI)) {
+ expandPrivateLoad(MI);
+ } else if (isLocalInst(TM, MI)) {
+ expandLocalLoad(MI);
+ } else if (isConstantInst(TM, MI)) {
+ if (isConstantPoolInst(TM, MI)) {
+ expandConstantPoolLoad(MI);
+ } else {
+ expandConstantLoad(MI);
+ }
+ } else {
+ assert(!"Found an unsupported load instruction!");
+ }
+ } else if (isStoreInst(TM, MI)) {
+ if (isGlobalInst(TM, MI)) {
+ expandGlobalStore(MI);
+ } else if (isRegionInst(TM, MI)) {
+ expandRegionStore(MI);
+ } else if (isPrivateInst(TM, MI)) {
+ expandPrivateStore(MI);
+ } else if (isLocalInst(TM, MI)) {
+ expandLocalStore(MI);
+ } else {
+ assert(!"Found an unsupported load instruction!");
+ }
+ } else {
+ assert(!"Found an unsupported IO instruction!");
+ }
+}
+
+bool
+AMDILIOExpansion::isAddrCalcInstr(MachineInstr *MI)
+{
+ if (isPrivateInst(TM, MI) && isLoadInst(TM, MI)) {
+ // This section of code is a workaround for the problem of
+ // globally scoped constant address variables. The problems
+ // comes that although they are declared in the constant
+ // address space, all variables must be allocated in the
+ // private address space. So when there is a load from
+ // the global address, it automatically goes into the private
+ // address space. However, the data section is placed in the
+ // constant address space so we need to check to see if our
+ // load base address is a global variable or not. Only if it
+ // is not a global variable can we do the address calculation
+ // into the private memory ring.
+
+ MachineMemOperand& memOp = (**MI->memoperands_begin());
+ const Value *V = memOp.getValue();
+ if (V) {
+ const GlobalValue *GV = dyn_cast<GlobalVariable>(V);
+ return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)
+ && !(GV);
+ } else {
+ return false;
+ }
+ } else if (isConstantPoolInst(TM, MI) && isLoadInst(TM, MI)) {
+ return MI->getOperand(1).isReg();
+ } else if (isPrivateInst(TM, MI) && isStoreInst(TM, MI)) {
+ return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem);
+ } else if (isLocalInst(TM, MI) && (isStoreInst(TM, MI) || isLoadInst(TM, MI))) {
+ return mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem);
+ }
+ return false;
+}
+
+bool
+AMDILIOExpansion::isExtendLoad(MachineInstr *MI)
+{
+ return isSExtLoadInst(TM, MI) || isZExtLoadInst(TM, MI) || isAExtLoadInst(TM, MI);
+}
+
+bool
+AMDILIOExpansion::isHardwareRegion(MachineInstr *MI)
+{
+ return (isRegionInst(TM, MI) && (isLoadInst(TM, MI) || isStoreInst(TM, MI)) &&
+ mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem));
+}
+bool
+AMDILIOExpansion::isHardwareLocal(MachineInstr *MI)
+{
+ return (isLocalInst(TM, MI) && (isLoadInst(TM, MI) || isStoreInst(TM, MI)) &&
+ mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem));
+}
+bool
+AMDILIOExpansion::isPackedData(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ default:
+ if (isTruncStoreInst(TM, MI)) {
+ switch (MI->getDesc().OpInfo[0].RegClass) {
+ default:
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ switch (getMemorySize(MI)) {
+ case 2:
+ case 4:
+ return true;
+ default:
+ break;
+ }
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ switch (getMemorySize(MI)) {
+ case 4:
+ case 8:
+ return true;
+ default:
+ break;
+ }
+ break;
+ }
+ }
+ break;
+ ExpandCaseToPackedTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATELOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATESEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATEZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATEAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTZEXTLOAD);
+ ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALSTORE);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATESTORE);
+ ExpandCaseToPackedTypes(AMDIL::LOCALSTORE);
+ ExpandCaseToPackedTypes(AMDIL::REGIONSTORE);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLSEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLZEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLAEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALSEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALZEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALAEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::LOCALLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::LOCALSEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::LOCALZEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::LOCALAEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::REGIONLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::REGIONSEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::REGIONZEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::REGIONAEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATELOAD64);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATESEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATEZEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATEAEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTSEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTAEXTLOAD64);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTZEXTLOAD64);
+ ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE64)
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE64);
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE64);
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE64);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALSTORE64);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATESTORE64);
+ ExpandCaseToPackedTypes(AMDIL::LOCALSTORE64);
+ ExpandCaseToPackedTypes(AMDIL::REGIONSTORE64);
+ return true;
+ }
+ return false;
+}
+
+bool
+AMDILIOExpansion::isStaticCPLoad(MachineInstr *MI)
+{
+ if (isConstantPoolInst(TM, MI) && isLoadInst(TM, MI)) {
+ uint32_t x = 0;
+ uint32_t num = MI->getNumOperands();
+ for (x = 0; x < num; ++x) {
+ if (MI->getOperand(x).isCPI()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool
+AMDILIOExpansion::isNbitType(Type *mType, uint32_t nBits, bool isScalar)
+{
+ if (!mType) {
+ return false;
+ }
+ if (dyn_cast<PointerType>(mType)) {
+ PointerType *PT = dyn_cast<PointerType>(mType);
+ return isNbitType(PT->getElementType(), nBits);
+ } else if (dyn_cast<StructType>(mType)) {
+ return TM.getTargetData()->getTypeSizeInBits(mType) == nBits;
+ } else if (dyn_cast<VectorType>(mType)) {
+ VectorType *VT = dyn_cast<VectorType>(mType);
+ size_t size = VT->getScalarSizeInBits();
+ return (isScalar ?
+ VT->getNumElements() * size == nBits : size == nBits);
+ } else if (dyn_cast<ArrayType>(mType)) {
+ ArrayType *AT = dyn_cast<ArrayType>(mType);
+ size_t size = AT->getScalarSizeInBits();
+ return (isScalar ?
+ AT->getNumElements() * size == nBits : size == nBits);
+ } else if (mType->isSized()) {
+ return mType->getScalarSizeInBits() == nBits;
+ } else {
+ assert(0 && "Found a type that we don't know how to handle!");
+ return false;
+ }
+}
+
+bool
+AMDILIOExpansion::isHardwareInst(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curInst;
+ getAsmPrinterFlags(MI, curInst);
+ return curInst.bits.HardwareInst;
+}
+
+uint32_t
+AMDILIOExpansion::getDataReg(MachineInstr *MI)
+{
+ REG_PACKED_TYPE id = getPackedID(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ return AMDIL::R1011;
+ case 4:
+ if (id == UNPACK_V4I8
+ || id == PACK_V4I8) {
+ return AMDIL::R1011;
+ } else if (id == UNPACK_V2I16
+ || id == PACK_V2I16) {
+ return AMDIL::Rxy1011;
+ }
+ case 2:
+ if (id == UNPACK_V2I8
+ || id == PACK_V2I8) {
+ return AMDIL::Rxy1011;
+ }
+ case 1:
+ return AMDIL::Rx1011;
+ case 8:
+ if (id == UNPACK_V4I16
+ || id == PACK_V4I16) {
+ return AMDIL::R1011;
+ }
+ return AMDIL::Rxy1011;
+ }
+}
+
+REG_PACKED_TYPE
+AMDILIOExpansion::getPackedID(MachineInstr *MI)
+{
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64_v2i64i8:
+ case AMDIL::REGIONTRUNCSTORE64_v2i64i8:
+ case AMDIL::LOCALTRUNCSTORE64_v2i64i8:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i64i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i8:
+ case AMDIL::GLOBALTRUNCSTORE64_v2i32i8:
+ case AMDIL::REGIONTRUNCSTORE64_v2i32i8:
+ case AMDIL::LOCALTRUNCSTORE64_v2i32i8:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i8:
+ case AMDIL::GLOBALTRUNCSTORE64_v2i16i8:
+ case AMDIL::REGIONTRUNCSTORE64_v2i16i8:
+ case AMDIL::LOCALTRUNCSTORE64_v2i16i8:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i16i8:
+ case AMDIL::GLOBALSTORE64_v2i8:
+ case AMDIL::LOCALSTORE64_v2i8:
+ case AMDIL::REGIONSTORE64_v2i8:
+ case AMDIL::PRIVATESTORE64_v2i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i16i8:
+ case AMDIL::GLOBALSTORE_v2i8:
+ case AMDIL::LOCALSTORE_v2i8:
+ case AMDIL::REGIONSTORE_v2i8:
+ case AMDIL::PRIVATESTORE_v2i8:
+ return PACK_V2I8;
+ case AMDIL::GLOBALTRUNCSTORE64_v4i32i8:
+ case AMDIL::REGIONTRUNCSTORE64_v4i32i8:
+ case AMDIL::LOCALTRUNCSTORE64_v4i32i8:
+ case AMDIL::PRIVATETRUNCSTORE64_v4i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i8:
+ case AMDIL::GLOBALTRUNCSTORE64_v4i16i8:
+ case AMDIL::REGIONTRUNCSTORE64_v4i16i8:
+ case AMDIL::LOCALTRUNCSTORE64_v4i16i8:
+ case AMDIL::PRIVATETRUNCSTORE64_v4i16i8:
+ case AMDIL::GLOBALSTORE64_v4i8:
+ case AMDIL::LOCALSTORE64_v4i8:
+ case AMDIL::REGIONSTORE64_v4i8:
+ case AMDIL::PRIVATESTORE64_v4i8:
+ case AMDIL::GLOBALTRUNCSTORE_v4i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i16i8:
+ case AMDIL::GLOBALSTORE_v4i8:
+ case AMDIL::LOCALSTORE_v4i8:
+ case AMDIL::REGIONSTORE_v4i8:
+ case AMDIL::PRIVATESTORE_v4i8:
+ return PACK_V4I8;
+ case AMDIL::GLOBALTRUNCSTORE64_v2i64i16:
+ case AMDIL::REGIONTRUNCSTORE64_v2i64i16:
+ case AMDIL::LOCALTRUNCSTORE64_v2i64i16:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i64i16:
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i16:
+ case AMDIL::GLOBALTRUNCSTORE64_v2i32i16:
+ case AMDIL::REGIONTRUNCSTORE64_v2i32i16:
+ case AMDIL::LOCALTRUNCSTORE64_v2i32i16:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i32i16:
+ case AMDIL::GLOBALSTORE64_v2i16:
+ case AMDIL::LOCALSTORE64_v2i16:
+ case AMDIL::REGIONSTORE64_v2i16:
+ case AMDIL::PRIVATESTORE64_v2i16:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i16:
+ case AMDIL::GLOBALSTORE_v2i16:
+ case AMDIL::LOCALSTORE_v2i16:
+ case AMDIL::REGIONSTORE_v2i16:
+ case AMDIL::PRIVATESTORE_v2i16:
+ return PACK_V2I16;
+ case AMDIL::GLOBALTRUNCSTORE64_v4i32i16:
+ case AMDIL::REGIONTRUNCSTORE64_v4i32i16:
+ case AMDIL::LOCALTRUNCSTORE64_v4i32i16:
+ case AMDIL::PRIVATETRUNCSTORE64_v4i32i16:
+ case AMDIL::GLOBALSTORE64_v4i16:
+ case AMDIL::LOCALSTORE64_v4i16:
+ case AMDIL::REGIONSTORE64_v4i16:
+ case AMDIL::PRIVATESTORE64_v4i16:
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i16:
+ case AMDIL::GLOBALSTORE_v4i16:
+ case AMDIL::LOCALSTORE_v4i16:
+ case AMDIL::REGIONSTORE_v4i16:
+ case AMDIL::PRIVATESTORE_v4i16:
+ return PACK_V4I16;
+
+ case AMDIL::GLOBALLOAD64_v2i8:
+ case AMDIL::GLOBALSEXTLOAD64_v2i8:
+ case AMDIL::GLOBALAEXTLOAD64_v2i8:
+ case AMDIL::GLOBALZEXTLOAD64_v2i8:
+ case AMDIL::LOCALLOAD64_v2i8:
+ case AMDIL::LOCALSEXTLOAD64_v2i8:
+ case AMDIL::LOCALAEXTLOAD64_v2i8:
+ case AMDIL::LOCALZEXTLOAD64_v2i8:
+ case AMDIL::REGIONLOAD64_v2i8:
+ case AMDIL::REGIONSEXTLOAD64_v2i8:
+ case AMDIL::REGIONAEXTLOAD64_v2i8:
+ case AMDIL::REGIONZEXTLOAD64_v2i8:
+ case AMDIL::PRIVATELOAD64_v2i8:
+ case AMDIL::PRIVATESEXTLOAD64_v2i8:
+ case AMDIL::PRIVATEAEXTLOAD64_v2i8:
+ case AMDIL::PRIVATEZEXTLOAD64_v2i8:
+ case AMDIL::CONSTANTLOAD64_v2i8:
+ case AMDIL::CONSTANTSEXTLOAD64_v2i8:
+ case AMDIL::CONSTANTAEXTLOAD64_v2i8:
+ case AMDIL::CONSTANTZEXTLOAD64_v2i8:
+ case AMDIL::GLOBALLOAD_v2i8:
+ case AMDIL::GLOBALSEXTLOAD_v2i8:
+ case AMDIL::GLOBALAEXTLOAD_v2i8:
+ case AMDIL::GLOBALZEXTLOAD_v2i8:
+ case AMDIL::LOCALLOAD_v2i8:
+ case AMDIL::LOCALSEXTLOAD_v2i8:
+ case AMDIL::LOCALAEXTLOAD_v2i8:
+ case AMDIL::LOCALZEXTLOAD_v2i8:
+ case AMDIL::REGIONLOAD_v2i8:
+ case AMDIL::REGIONSEXTLOAD_v2i8:
+ case AMDIL::REGIONAEXTLOAD_v2i8:
+ case AMDIL::REGIONZEXTLOAD_v2i8:
+ case AMDIL::PRIVATELOAD_v2i8:
+ case AMDIL::PRIVATESEXTLOAD_v2i8:
+ case AMDIL::PRIVATEAEXTLOAD_v2i8:
+ case AMDIL::PRIVATEZEXTLOAD_v2i8:
+ case AMDIL::CONSTANTLOAD_v2i8:
+ case AMDIL::CONSTANTSEXTLOAD_v2i8:
+ case AMDIL::CONSTANTAEXTLOAD_v2i8:
+ case AMDIL::CONSTANTZEXTLOAD_v2i8:
+ return UNPACK_V2I8;
+
+ case AMDIL::GLOBALLOAD64_v4i8:
+ case AMDIL::GLOBALSEXTLOAD64_v4i8:
+ case AMDIL::GLOBALAEXTLOAD64_v4i8:
+ case AMDIL::GLOBALZEXTLOAD64_v4i8:
+ case AMDIL::LOCALLOAD64_v4i8:
+ case AMDIL::LOCALSEXTLOAD64_v4i8:
+ case AMDIL::LOCALAEXTLOAD64_v4i8:
+ case AMDIL::LOCALZEXTLOAD64_v4i8:
+ case AMDIL::REGIONLOAD64_v4i8:
+ case AMDIL::REGIONSEXTLOAD64_v4i8:
+ case AMDIL::REGIONAEXTLOAD64_v4i8:
+ case AMDIL::REGIONZEXTLOAD64_v4i8:
+ case AMDIL::PRIVATELOAD64_v4i8:
+ case AMDIL::PRIVATESEXTLOAD64_v4i8:
+ case AMDIL::PRIVATEAEXTLOAD64_v4i8:
+ case AMDIL::PRIVATEZEXTLOAD64_v4i8:
+ case AMDIL::CONSTANTLOAD64_v4i8:
+ case AMDIL::CONSTANTSEXTLOAD64_v4i8:
+ case AMDIL::CONSTANTAEXTLOAD64_v4i8:
+ case AMDIL::CONSTANTZEXTLOAD64_v4i8:
+ case AMDIL::GLOBALLOAD_v4i8:
+ case AMDIL::GLOBALSEXTLOAD_v4i8:
+ case AMDIL::GLOBALAEXTLOAD_v4i8:
+ case AMDIL::GLOBALZEXTLOAD_v4i8:
+ case AMDIL::LOCALLOAD_v4i8:
+ case AMDIL::LOCALSEXTLOAD_v4i8:
+ case AMDIL::LOCALAEXTLOAD_v4i8:
+ case AMDIL::LOCALZEXTLOAD_v4i8:
+ case AMDIL::REGIONLOAD_v4i8:
+ case AMDIL::REGIONSEXTLOAD_v4i8:
+ case AMDIL::REGIONAEXTLOAD_v4i8:
+ case AMDIL::REGIONZEXTLOAD_v4i8:
+ case AMDIL::PRIVATELOAD_v4i8:
+ case AMDIL::PRIVATESEXTLOAD_v4i8:
+ case AMDIL::PRIVATEAEXTLOAD_v4i8:
+ case AMDIL::PRIVATEZEXTLOAD_v4i8:
+ case AMDIL::CONSTANTLOAD_v4i8:
+ case AMDIL::CONSTANTSEXTLOAD_v4i8:
+ case AMDIL::CONSTANTAEXTLOAD_v4i8:
+ case AMDIL::CONSTANTZEXTLOAD_v4i8:
+ return UNPACK_V4I8;
+
+ case AMDIL::GLOBALLOAD64_v2i16:
+ case AMDIL::GLOBALSEXTLOAD64_v2i16:
+ case AMDIL::GLOBALAEXTLOAD64_v2i16:
+ case AMDIL::GLOBALZEXTLOAD64_v2i16:
+ case AMDIL::LOCALLOAD64_v2i16:
+ case AMDIL::LOCALSEXTLOAD64_v2i16:
+ case AMDIL::LOCALAEXTLOAD64_v2i16:
+ case AMDIL::LOCALZEXTLOAD64_v2i16:
+ case AMDIL::REGIONLOAD64_v2i16:
+ case AMDIL::REGIONSEXTLOAD64_v2i16:
+ case AMDIL::REGIONAEXTLOAD64_v2i16:
+ case AMDIL::REGIONZEXTLOAD64_v2i16:
+ case AMDIL::PRIVATELOAD64_v2i16:
+ case AMDIL::PRIVATESEXTLOAD64_v2i16:
+ case AMDIL::PRIVATEAEXTLOAD64_v2i16:
+ case AMDIL::PRIVATEZEXTLOAD64_v2i16:
+ case AMDIL::CONSTANTLOAD64_v2i16:
+ case AMDIL::CONSTANTSEXTLOAD64_v2i16:
+ case AMDIL::CONSTANTAEXTLOAD64_v2i16:
+ case AMDIL::CONSTANTZEXTLOAD64_v2i16:
+ case AMDIL::GLOBALLOAD_v2i16:
+ case AMDIL::GLOBALSEXTLOAD_v2i16:
+ case AMDIL::GLOBALAEXTLOAD_v2i16:
+ case AMDIL::GLOBALZEXTLOAD_v2i16:
+ case AMDIL::LOCALLOAD_v2i16:
+ case AMDIL::LOCALSEXTLOAD_v2i16:
+ case AMDIL::LOCALAEXTLOAD_v2i16:
+ case AMDIL::LOCALZEXTLOAD_v2i16:
+ case AMDIL::REGIONLOAD_v2i16:
+ case AMDIL::REGIONSEXTLOAD_v2i16:
+ case AMDIL::REGIONAEXTLOAD_v2i16:
+ case AMDIL::REGIONZEXTLOAD_v2i16:
+ case AMDIL::PRIVATELOAD_v2i16:
+ case AMDIL::PRIVATESEXTLOAD_v2i16:
+ case AMDIL::PRIVATEAEXTLOAD_v2i16:
+ case AMDIL::PRIVATEZEXTLOAD_v2i16:
+ case AMDIL::CONSTANTLOAD_v2i16:
+ case AMDIL::CONSTANTSEXTLOAD_v2i16:
+ case AMDIL::CONSTANTAEXTLOAD_v2i16:
+ case AMDIL::CONSTANTZEXTLOAD_v2i16:
+ return UNPACK_V2I16;
+
+ case AMDIL::GLOBALLOAD64_v4i16:
+ case AMDIL::GLOBALSEXTLOAD64_v4i16:
+ case AMDIL::GLOBALAEXTLOAD64_v4i16:
+ case AMDIL::GLOBALZEXTLOAD64_v4i16:
+ case AMDIL::LOCALLOAD64_v4i16:
+ case AMDIL::LOCALSEXTLOAD64_v4i16:
+ case AMDIL::LOCALAEXTLOAD64_v4i16:
+ case AMDIL::LOCALZEXTLOAD64_v4i16:
+ case AMDIL::REGIONLOAD64_v4i16:
+ case AMDIL::REGIONSEXTLOAD64_v4i16:
+ case AMDIL::REGIONAEXTLOAD64_v4i16:
+ case AMDIL::REGIONZEXTLOAD64_v4i16:
+ case AMDIL::PRIVATELOAD64_v4i16:
+ case AMDIL::PRIVATESEXTLOAD64_v4i16:
+ case AMDIL::PRIVATEAEXTLOAD64_v4i16:
+ case AMDIL::PRIVATEZEXTLOAD64_v4i16:
+ case AMDIL::CONSTANTLOAD64_v4i16:
+ case AMDIL::CONSTANTSEXTLOAD64_v4i16:
+ case AMDIL::CONSTANTAEXTLOAD64_v4i16:
+ case AMDIL::CONSTANTZEXTLOAD64_v4i16:
+ case AMDIL::GLOBALLOAD_v4i16:
+ case AMDIL::GLOBALSEXTLOAD_v4i16:
+ case AMDIL::GLOBALAEXTLOAD_v4i16:
+ case AMDIL::GLOBALZEXTLOAD_v4i16:
+ case AMDIL::LOCALLOAD_v4i16:
+ case AMDIL::LOCALSEXTLOAD_v4i16:
+ case AMDIL::LOCALAEXTLOAD_v4i16:
+ case AMDIL::LOCALZEXTLOAD_v4i16:
+ case AMDIL::REGIONLOAD_v4i16:
+ case AMDIL::REGIONSEXTLOAD_v4i16:
+ case AMDIL::REGIONAEXTLOAD_v4i16:
+ case AMDIL::REGIONZEXTLOAD_v4i16:
+ case AMDIL::PRIVATELOAD_v4i16:
+ case AMDIL::PRIVATESEXTLOAD_v4i16:
+ case AMDIL::PRIVATEAEXTLOAD_v4i16:
+ case AMDIL::PRIVATEZEXTLOAD_v4i16:
+ case AMDIL::CONSTANTLOAD_v4i16:
+ case AMDIL::CONSTANTSEXTLOAD_v4i16:
+ case AMDIL::CONSTANTAEXTLOAD_v4i16:
+ case AMDIL::CONSTANTZEXTLOAD_v4i16:
+ return UNPACK_V4I16;
+ };
+ return NO_PACKING;
+}
+
+uint32_t
+AMDILIOExpansion::getPointerID(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curInst;
+ getAsmPrinterFlags(MI, curInst);
+ return curInst.bits.ResourceID;
+}
+
+uint32_t
+AMDILIOExpansion::getShiftSize(MachineInstr *MI)
+{
+ switch(getPackedID(MI)) {
+ default:
+ return 0;
+ case PACK_V2I8:
+ case PACK_V4I8:
+ case UNPACK_V2I8:
+ case UNPACK_V4I8:
+ return 1;
+ case PACK_V2I16:
+ case PACK_V4I16:
+ case UNPACK_V2I16:
+ case UNPACK_V4I16:
+ return 2;
+ }
+ return 0;
+}
+uint32_t
+AMDILIOExpansion::getMemorySize(MachineInstr *MI)
+{
+ if (MI->memoperands_empty()) {
+ return 4;
+ }
+ return (uint32_t)((*MI->memoperands_begin())->getSize());
+}
+
+unsigned
+AMDILIOExpansion::expandLongExtend(MachineInstr *MI,
+ uint32_t numComps, uint32_t size, bool signedShift)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ switch(size) {
+ default:
+ assert(0 && "Found a case we don't handle!");
+ break;
+ case 8:
+ if (numComps == 1) {
+ return expandLongExtendSub32(MI, AMDIL::SHL_i8, AMDIL::SHRVEC_v2i32,
+ AMDIL::USHRVEC_i8,
+ 24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE, signedShift,
+ false);
+ } else if (numComps == 2) {
+ return expandLongExtendSub32(MI, AMDIL::SHL_v2i8, AMDIL::SHRVEC_v4i32,
+ AMDIL::USHRVEC_v2i8,
+ 24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE_v2i64, signedShift,
+ true);
+ } else {
+ assert(0 && "Found a case we don't handle!");
+ }
+ break;
+ case 16:
+ if (numComps == 1) {
+ return expandLongExtendSub32(MI, AMDIL::SHL_i16, AMDIL::SHRVEC_v2i32,
+ AMDIL::USHRVEC_i16,
+ 16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE, signedShift,
+ false);
+ } else if (numComps == 2) {
+ return expandLongExtendSub32(MI, AMDIL::SHL_v2i16, AMDIL::SHRVEC_v4i32,
+ AMDIL::USHRVEC_v2i16,
+ 16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE_v2i64, signedShift,
+ true);
+ } else {
+ assert(0 && "Found a case we don't handle!");
+ }
+ break;
+ case 32:
+ if (numComps == 1) {
+ MachineInstr *nMI = NULL;
+ if (signedShift) {
+ nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_i32), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rx1011)
+ .addImm(mMFI->addi64Literal((0ULL | (31ULL << 32))));
+ } else {
+ nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rx1011)
+ .addImm(mMFI->addi32Literal(0));
+ }
+ return nMI->getOperand(0).getReg();
+ } else if (numComps == 2) {
+ MachineInstr *nMI = NULL;
+ if (signedShift) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v2i32), AMDIL::Rxy1012)
+ .addReg(AMDIL::Rxy1011)
+ .addImm(mMFI->addi64Literal(31));
+ nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::Rxy1011)
+ .addReg(AMDIL::Rxy1012);
+ } else {
+ nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::Rxy1011)
+ .addImm(mMFI->addi32Literal(0));
+ }
+ return nMI->getOperand(0).getReg();
+ } else {
+ assert(0 && "Found a case we don't handle!");
+ }
+ };
+ return 0;
+}
+unsigned
+AMDILIOExpansion::expandLongExtendSub32(MachineInstr *MI,
+ unsigned SHLop, unsigned SHRop, unsigned USHRop,
+ unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm,
+ unsigned LCRop, bool signedShift, bool vec2)
+{
+ MachineInstr *nMI = NULL;
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*mBB, MI, DL, mTII->get(SHLop),
+ (vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+ .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+ .addImm(mMFI->addi32Literal(SHLimm));
+ if (signedShift) {
+ BuildMI(*mBB, MI, DL, mTII->get(LCRop),
+ (vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
+ .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+ .addReg(AMDIL::Rxy1011);
+ nMI = BuildMI(*mBB, MI, DL, mTII->get(SHRop),
+ (vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
+ .addReg((vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
+ .addImm(mMFI->addi64Literal(SHRimm));
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(USHRop),
+ (vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+ .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+ .addImm(mMFI->addi32Literal(USHRimm));
+ nMI = BuildMI(*mBB, MI, MI->getDebugLoc(), mTII->get(LCRop),
+ (vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
+ .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+ .addImm(mMFI->addi32Literal(0));
+ }
+ return nMI->getOperand(0).getReg();
+}
+
+unsigned
+AMDILIOExpansion::expandIntegerExtend(MachineInstr *MI, unsigned SHLop,
+ unsigned SHRop, unsigned offset, unsigned reg)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ offset = mMFI->addi32Literal(offset);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(SHLop), reg)
+ .addReg(reg).addImm(offset);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(SHRop), reg)
+ .addReg(reg).addImm(offset);
+ return reg;
+}
+unsigned
+AMDILIOExpansion::expandExtendLoad(MachineInstr *MI)
+{
+ if (!isExtendLoad(MI)) {
+ return 0;
+ }
+ Type *mType = NULL;
+ if (!MI->memoperands_empty()) {
+ MachineMemOperand *memOp = (*MI->memoperands_begin());
+ const Value *moVal = (memOp) ? memOp->getValue() : NULL;
+ mType = (moVal) ? moVal->getType() : NULL;
+ }
+ unsigned opcode = 0;
+ DebugLoc DL = MI->getDebugLoc();
+ if (isZExtLoadInst(TM, MI) || isAExtLoadInst(TM, MI) || isSExtLoadInst(TM, MI)) {
+ switch(MI->getDesc().OpInfo[0].RegClass) {
+ default:
+ assert(0 && "Found an extending load that we don't handle!");
+ break;
+ case AMDIL::GPRI16RegClassID:
+ if (!isHardwareLocal(MI)
+ || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) {
+ opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_i16 : AMDIL::USHRVEC_i16;
+ return expandIntegerExtend(MI, AMDIL::SHL_i16, opcode, 24, AMDIL::Rx1011);
+ }
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v2i16 : AMDIL::USHRVEC_v2i16;
+ return expandIntegerExtend(MI, AMDIL::SHL_v2i16, opcode, 24, AMDIL::Rxy1011);
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i8 : AMDIL::USHRVEC_v4i8;
+ return expandIntegerExtend(MI, AMDIL::SHL_v4i8, opcode, 24, AMDIL::R1011);
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i16 : AMDIL::USHRVEC_v4i16;
+ return expandIntegerExtend(MI, AMDIL::SHL_v4i16, opcode, 24, AMDIL::R1011);
+ break;
+ case AMDIL::GPRI32RegClassID:
+ // We can be a i8 or i16 bit sign extended value
+ if (isNbitType(mType, 8) || getMemorySize(MI) == 1) {
+ opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32;
+ expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 24, AMDIL::Rx1011);
+ } else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) {
+ opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32;
+ expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 16, AMDIL::Rx1011);
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ return AMDIL::Rx1011;
+ break;
+ case AMDIL::GPRV2I32RegClassID:
+ // We can be a v2i8 or v2i16 bit sign extended value
+ if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) {
+ opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 24, AMDIL::Rxy1011);
+ } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) {
+ opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 16, AMDIL::Rxy1011);
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ return AMDIL::Rxy1011;
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ // We can be a v4i8 or v4i16 bit sign extended value
+ if (isNbitType(mType, 8, false) || getMemorySize(MI) == 4) {
+ opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 24, AMDIL::R1011);
+ } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 8) {
+ opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 16, AMDIL::R1011);
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ return AMDIL::R1011;
+ break;
+ case AMDIL::GPRI64RegClassID:
+ // We can be a i8, i16 or i32 bit sign extended value
+ if (isNbitType(mType, 8) || getMemorySize(MI) == 1) {
+ return expandLongExtend(MI, 1, 8, isSExtLoadInst(TM, MI));
+ } else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) {
+ return expandLongExtend(MI, 1, 16, isSExtLoadInst(TM, MI));
+ } else if (isNbitType(mType, 32) || getMemorySize(MI) == 4) {
+ return expandLongExtend(MI, 1, 32, isSExtLoadInst(TM, MI));
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ // We can be a v2i8, v2i16 or v2i32 bit sign extended value
+ if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) {
+ return expandLongExtend(MI, 2, 8, isSExtLoadInst(TM, MI));
+ } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) {
+ return expandLongExtend(MI, 2, 16, isSExtLoadInst(TM, MI));
+ } else if (isNbitType(mType, 32, false) || getMemorySize(MI) == 8) {
+ return expandLongExtend(MI, 2, 32, isSExtLoadInst(TM, MI));
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRF32RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::HTOF_f32), AMDIL::Rx1011)
+ .addReg(AMDIL::Rx1011);
+ return AMDIL::Rx1011;
+ case AMDIL::GPRV2F32RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::HTOF_v2f32), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rxy1011);
+ return AMDIL::Rxy1011;
+ case AMDIL::GPRV4F32RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::HTOF_v4f32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ return AMDIL::R1011;
+ case AMDIL::GPRF64RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTOD), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rx1011);
+ return AMDIL::Rxy1011;
+ case AMDIL::GPRV2F64RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTOD), AMDIL::Rzw1011)
+ .addReg(AMDIL::Ry1011);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTOD), AMDIL::Rxy1011)
+ .addReg(AMDIL::Rx1011);
+ return AMDIL::R1011;
+ }
+ }
+ return 0;
+}
+
+void
+AMDILIOExpansion::expandTruncData(MachineInstr *MI)
+{
+ if (!isTruncStoreInst(TM, MI)) {
+ return;
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ switch (MI->getOpcode()) {
+ default:
+ MI->dump();
+ assert(!"Found a trunc store instructions we don't handle!");
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64_i64i8:
+ case AMDIL::GLOBALTRUNCSTORE64_v2i64i8:
+ case AMDIL::LOCALTRUNCSTORE64_i64i8:
+ case AMDIL::LOCALTRUNCSTORE64_v2i64i8:
+ case AMDIL::REGIONTRUNCSTORE64_i64i8:
+ case AMDIL::REGIONTRUNCSTORE64_v2i64i8:
+ case AMDIL::PRIVATETRUNCSTORE64_i64i8:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i64i8:
+ case AMDIL::GLOBALTRUNCSTORE_i64i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
+ case AMDIL::LOCALTRUNCSTORE_i64i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i8:
+ case AMDIL::REGIONTRUNCSTORE_i64i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i8:
+ case AMDIL::PRIVATETRUNCSTORE_i64i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i8:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
+ .addReg(AMDIL::R1011);
+ case AMDIL::GLOBALTRUNCSTORE64_i16i8:
+ case AMDIL::GLOBALTRUNCSTORE64_v2i16i8:
+ case AMDIL::GLOBALTRUNCSTORE64_v4i16i8:
+ case AMDIL::LOCALTRUNCSTORE64_i16i8:
+ case AMDIL::LOCALTRUNCSTORE64_v2i16i8:
+ case AMDIL::LOCALTRUNCSTORE64_v4i16i8:
+ case AMDIL::REGIONTRUNCSTORE64_i16i8:
+ case AMDIL::REGIONTRUNCSTORE64_v2i16i8:
+ case AMDIL::REGIONTRUNCSTORE64_v4i16i8:
+ case AMDIL::PRIVATETRUNCSTORE64_i16i8:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i16i8:
+ case AMDIL::PRIVATETRUNCSTORE64_v4i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_v4i16i8:
+ case AMDIL::LOCALTRUNCSTORE_i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i16i8:
+ case AMDIL::REGIONTRUNCSTORE_i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i16i8:
+ case AMDIL::GLOBALTRUNCSTORE64_i32i8:
+ case AMDIL::GLOBALTRUNCSTORE64_v2i32i8:
+ case AMDIL::GLOBALTRUNCSTORE64_v4i32i8:
+ case AMDIL::LOCALTRUNCSTORE64_i32i8:
+ case AMDIL::LOCALTRUNCSTORE64_v2i32i8:
+ case AMDIL::LOCALTRUNCSTORE64_v4i32i8:
+ case AMDIL::REGIONTRUNCSTORE64_i32i8:
+ case AMDIL::REGIONTRUNCSTORE64_v2i32i8:
+ case AMDIL::REGIONTRUNCSTORE64_v4i32i8:
+ case AMDIL::PRIVATETRUNCSTORE64_i32i8:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i32i8:
+ case AMDIL::PRIVATETRUNCSTORE64_v4i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i8:
+ case AMDIL::LOCALTRUNCSTORE_i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i8:
+ case AMDIL::REGIONTRUNCSTORE_i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i8:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64_i64i16:
+ case AMDIL::GLOBALTRUNCSTORE64_v2i64i16:
+ case AMDIL::LOCALTRUNCSTORE64_i64i16:
+ case AMDIL::LOCALTRUNCSTORE64_v2i64i16:
+ case AMDIL::REGIONTRUNCSTORE64_i64i16:
+ case AMDIL::REGIONTRUNCSTORE64_v2i64i16:
+ case AMDIL::PRIVATETRUNCSTORE64_i64i16:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i64i16:
+ case AMDIL::GLOBALTRUNCSTORE_i64i16:
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i16:
+ case AMDIL::LOCALTRUNCSTORE_i64i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i16:
+ case AMDIL::REGIONTRUNCSTORE_i64i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i16:
+ case AMDIL::PRIVATETRUNCSTORE_i64i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i16:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
+ .addReg(AMDIL::R1011);
+ case AMDIL::GLOBALTRUNCSTORE64_i32i16:
+ case AMDIL::GLOBALTRUNCSTORE64_v2i32i16:
+ case AMDIL::GLOBALTRUNCSTORE64_v4i32i16:
+ case AMDIL::LOCALTRUNCSTORE64_i32i16:
+ case AMDIL::LOCALTRUNCSTORE64_v2i32i16:
+ case AMDIL::LOCALTRUNCSTORE64_v4i32i16:
+ case AMDIL::REGIONTRUNCSTORE64_i32i16:
+ case AMDIL::REGIONTRUNCSTORE64_v2i32i16:
+ case AMDIL::REGIONTRUNCSTORE64_v4i32i16:
+ case AMDIL::PRIVATETRUNCSTORE64_i32i16:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i32i16:
+ case AMDIL::PRIVATETRUNCSTORE64_v4i32i16:
+ case AMDIL::GLOBALTRUNCSTORE_i32i16:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i16:
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i16:
+ case AMDIL::LOCALTRUNCSTORE_i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i16:
+ case AMDIL::REGIONTRUNCSTORE_i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i16:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64_i64i32:
+ case AMDIL::LOCALTRUNCSTORE64_i64i32:
+ case AMDIL::REGIONTRUNCSTORE64_i64i32:
+ case AMDIL::PRIVATETRUNCSTORE64_i64i32:
+ case AMDIL::GLOBALTRUNCSTORE_i64i32:
+ case AMDIL::LOCALTRUNCSTORE_i64i32:
+ case AMDIL::REGIONTRUNCSTORE_i64i32:
+ case AMDIL::PRIVATETRUNCSTORE_i64i32:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO), AMDIL::Rx1011)
+ .addReg(AMDIL::Rxy1011);
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64_v2i64i32:
+ case AMDIL::LOCALTRUNCSTORE64_v2i64i32:
+ case AMDIL::REGIONTRUNCSTORE64_v2i64i32:
+ case AMDIL::PRIVATETRUNCSTORE64_v2i64i32:
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i32:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i32:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i32:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i32:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64_f64f32:
+ case AMDIL::LOCALTRUNCSTORE64_f64f32:
+ case AMDIL::REGIONTRUNCSTORE64_f64f32:
+ case AMDIL::PRIVATETRUNCSTORE64_f64f32:
+ case AMDIL::GLOBALTRUNCSTORE_f64f32:
+ case AMDIL::LOCALTRUNCSTORE_f64f32:
+ case AMDIL::REGIONTRUNCSTORE_f64f32:
+ case AMDIL::PRIVATETRUNCSTORE_f64f32:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+ AMDIL::Rx1011).addReg(AMDIL::Rxy1011);
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64_v2f64f32:
+ case AMDIL::LOCALTRUNCSTORE64_v2f64f32:
+ case AMDIL::REGIONTRUNCSTORE64_v2f64f32:
+ case AMDIL::PRIVATETRUNCSTORE64_v2f64f32:
+ case AMDIL::GLOBALTRUNCSTORE_v2f64f32:
+ case AMDIL::LOCALTRUNCSTORE_v2f64f32:
+ case AMDIL::REGIONTRUNCSTORE_v2f64f32:
+ case AMDIL::PRIVATETRUNCSTORE_v2f64f32:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+ AMDIL::Rx1011).addReg(AMDIL::Rxy1011);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+ AMDIL::Ry1011).addReg(AMDIL::Rzw1011);
+ break;
+ }
+}
+void
+AMDILIOExpansion::expandAddressCalc(MachineInstr *MI)
+{
+ if (!isAddrCalcInstr(MI)) {
+ return;
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ bool is64bit = is64bitLSOp(TM, MI);
+ uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+ uint32_t addInst = (is64bit) ? AMDIL::LADD_i64 : AMDIL::ADD_i32;
+ switch(MI->getOpcode()) {
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE64)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE)
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE64)
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD64)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD64)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD64)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD64)
+ BuildMI(*mBB, MI, DL, mTII->get(addInst),
+ addyReg).addReg(addyReg).addReg(AMDIL::T1);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE64)
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD64)
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD64)
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD64)
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD64)
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE64)
+ BuildMI(*mBB, MI, DL, mTII->get(addInst),
+ addyReg).addReg(addyReg).addReg(AMDIL::T2);
+ break;
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD64)
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD64)
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD64)
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD64)
+ BuildMI(*mBB, MI, DL, mTII->get(addInst),
+ addyReg).addReg(addyReg).addReg(AMDIL::SDP);
+ break;
+ default:
+ return;
+ }
+}
+void
+AMDILIOExpansion::expandLoadStartCode(MachineInstr *MI)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ bool is64bit = is64bitLSOp(TM, MI);
+ uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+ uint32_t addInst = (is64bit) ? AMDIL::LADD_i64 : AMDIL::ADD_i32;
+ uint32_t moveInst = (is64bit) ? AMDIL::MOVE_i64 : AMDIL::MOVE_i32;
+ if (MI->getOperand(2).isReg()) {
+ BuildMI(*mBB, MI, DL, mTII->get(addInst),
+ addyReg).addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(2).getReg());
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(moveInst),
+ addyReg).addReg(MI->getOperand(1).getReg());
+ }
+ MI->getOperand(1).setReg(addyReg);
+ expandAddressCalc(MI);
+}
+void
+AMDILIOExpansion::emitStaticCPLoad(MachineInstr* MI, int swizzle,
+ int id, bool ExtFPLoad)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ switch(swizzle) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad
+ ? AMDIL::DTOF : AMDIL::MOVE_i32),
+ MI->getOperand(0).getReg())
+ .addImm(id);
+ break;
+ case 1:
+ case 2:
+ case 3:
+ BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad
+ ? AMDIL::DTOF : AMDIL::MOVE_i32), AMDIL::Rx1001)
+ .addImm(id);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VINSERT_v4i32),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(AMDIL::Rx1001)
+ .addImm(swizzle + 1);
+ break;
+ };
+}
+void
+AMDILIOExpansion::emitCPInst(MachineInstr* MI,
+ const Constant* C, AMDILKernelManager* KM, int swizzle, bool ExtFPLoad)
+{
+ if (const ConstantFP* CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isFloatTy()) {
+ uint32_t val = (uint32_t)(CFP->getValueAPF().bitcastToAPInt()
+ .getZExtValue());
+ uint32_t id = mMFI->addi32Literal(val);
+ if (!id) {
+ const APFloat &APF = CFP->getValueAPF();
+ union dtol_union {
+ double d;
+ uint64_t ul;
+ } conv;
+ if (&APF.getSemantics()
+ == (const llvm::fltSemantics*)&APFloat::IEEEsingle) {
+ float fval = APF.convertToFloat();
+ conv.d = (double)fval;
+ } else {
+ conv.d = APF.convertToDouble();
+ }
+ id = mMFI->addi64Literal(conv.ul);
+ }
+ emitStaticCPLoad(MI, swizzle, id, ExtFPLoad);
+ } else {
+ const APFloat &APF = CFP->getValueAPF();
+ union ftol_union {
+ double d;
+ uint64_t ul;
+ } conv;
+ if (&APF.getSemantics()
+ == (const llvm::fltSemantics*)&APFloat::IEEEsingle) {
+ float fval = APF.convertToFloat();
+ conv.d = (double)fval;
+ } else {
+ conv.d = APF.convertToDouble();
+ }
+ uint32_t id = mMFI->getLongLits(conv.ul);
+ if (!id) {
+ id = mMFI->getIntLits((uint32_t)conv.ul);
+ }
+ emitStaticCPLoad(MI, swizzle, id, ExtFPLoad);
+ }
+ } else if (const ConstantInt* CI = dyn_cast<ConstantInt>(C)) {
+ int64_t val = 0;
+ if (CI) {
+ val = CI->getSExtValue();
+ }
+ if (CI->getBitWidth() == 64) {
+ emitStaticCPLoad(MI, swizzle, mMFI->addi64Literal(val), ExtFPLoad);
+ } else {
+ emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(val), ExtFPLoad);
+ }
+ } else if (const ConstantArray* CA = dyn_cast<ConstantArray>(C)) {
+ uint32_t size = CA->getNumOperands();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CA->getOperand(0), KM, x, ExtFPLoad);
+ }
+ } else if (const ConstantAggregateZero* CAZ
+ = dyn_cast<ConstantAggregateZero>(C)) {
+ if (CAZ->isNullValue()) {
+ emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(0), ExtFPLoad);
+ }
+ } else if (const ConstantStruct* CS = dyn_cast<ConstantStruct>(C)) {
+ uint32_t size = CS->getNumOperands();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CS->getOperand(0), KM, x, ExtFPLoad);
+ }
+ } else if (const ConstantVector* CV = dyn_cast<ConstantVector>(C)) {
+ // TODO: Make this handle vectors natively up to the correct
+ // size
+ uint32_t size = CV->getNumOperands();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CV->getOperand(0), KM, x, ExtFPLoad);
+ }
+ } else if (const ConstantDataVector* CV = dyn_cast<ConstantDataVector>(C)) {
+ // TODO: Make this handle vectors natively up to the correct
+ // size
+ uint32_t size = CV->getNumElements();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CV->getElementAsConstant(0), KM, x, ExtFPLoad);
+ }
+ } else {
+ // TODO: Do we really need to handle ConstantPointerNull?
+ // What about BlockAddress, ConstantExpr and Undef?
+ // How would these even be generated by a valid CL program?
+ assert(0 && "Found a constant type that I don't know how to handle");
+ }
+}
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,330 @@
+//===-- AMDILIOExpansion.h ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDIL IO Expansion class expands pseudo IO instructions into a sequence
+// of instructions that produces the correct results. These instructions are not
+// expanded earlier in the backend because any pass before this can assume to be able
+// to generate a load store instruction. So this pass can only have passes that
+// execute after it if no load store instructions can be generated in those passes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILIOEXPANSION_H_
+#define _AMDILIOEXPANSION_H_
+#undef DEBUG_TYPE
+#undef DEBUGME
+#define DEBUG_TYPE "IOExpansion"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+#include "llvm/Type.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm
+{
+class MachineFunction;
+class AMDILKernelManager;
+class AMDILMachineFunctionInfo;
+class AMDILSubtarget;
+class MachineInstr;
+class Constant;
+class TargetInstrInfo;
+typedef enum {
+ NO_PACKING = 0,
+ PACK_V2I8,
+ PACK_V4I8,
+ PACK_V2I16,
+ PACK_V4I16,
+ UNPACK_V2I8,
+ UNPACK_V4I8,
+ UNPACK_V2I16,
+ UNPACK_V4I16,
+ UNPACK_LAST
+} REG_PACKED_TYPE;
+class AMDILIOExpansion : public MachineFunctionPass
+{
+public:
+ virtual ~AMDILIOExpansion();
+ virtual const char* getPassName() const;
+ bool runOnMachineFunction(MachineFunction &MF);
+ static char ID;
+protected:
+ AMDILIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+ //
+ // @param MI Machine instruction to check.
+ // @brief checks to see if the machine instruction
+ // is an I/O instruction or not.
+ //
+ // @return true if I/O, false otherwise.
+ //
+ virtual bool
+ isIOInstruction(MachineInstr *MI);
+ // Wrapper function that calls the appropriate I/O
+ // expansion function based on the instruction type.
+ virtual void
+ expandIOInstruction(MachineInstr *MI);
+ virtual void
+ expandGlobalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionStore(MachineInstr *MI) = 0;
+ virtual void
+ expandPrivateStore(MachineInstr *MI) = 0;
+ virtual void
+ expandGlobalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandPrivateLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandConstantLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandConstantPoolLoad(MachineInstr *MI) = 0;
+ bool
+ isAddrCalcInstr(MachineInstr *MI);
+ bool
+ isExtendLoad(MachineInstr *MI);
+ bool
+ isHardwareRegion(MachineInstr *MI);
+ bool
+ isHardwareLocal(MachineInstr *MI);
+ bool
+ isPackedData(MachineInstr *MI);
+ bool
+ isStaticCPLoad(MachineInstr *MI);
+ bool
+ isNbitType(Type *MI, uint32_t nBits, bool isScalar = true);
+ bool
+ isHardwareInst(MachineInstr *MI);
+ uint32_t
+ getMemorySize(MachineInstr *MI);
+ REG_PACKED_TYPE
+ getPackedID(MachineInstr *MI);
+ uint32_t
+ getShiftSize(MachineInstr *MI);
+ uint32_t
+ getPointerID(MachineInstr *MI);
+ uint32_t
+ getDataReg(MachineInstr *MI);
+ void
+ expandTruncData(MachineInstr *MI);
+ void
+ expandLoadStartCode(MachineInstr *MI);
+ virtual void
+ expandStoreSetupCode(MachineInstr *MI) = 0;
+ void
+ expandAddressCalc(MachineInstr *MI);
+ unsigned
+ expandLongExtend(MachineInstr *MI,
+ uint32_t numComponents, uint32_t size, bool signedShift);
+ unsigned
+ expandLongExtendSub32(MachineInstr *MI,
+ unsigned SHLop, unsigned SHRop, unsigned USHRop,
+ unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm,
+ unsigned LCRop, bool signedShift, bool vec2);
+ unsigned
+ expandIntegerExtend(MachineInstr *MI, unsigned,
+ unsigned, unsigned, unsigned);
+ unsigned
+ expandExtendLoad(MachineInstr *MI);
+ virtual void
+ expandPackedData(MachineInstr *MI) = 0;
+ void
+ emitCPInst(MachineInstr* MI, const Constant* C,
+ AMDILKernelManager* KM, int swizzle, bool ExtFPLoad);
+
+ bool mDebug;
+ const AMDILSubtarget *mSTM;
+ AMDILKernelManager *mKM;
+ MachineBasicBlock *mBB;
+ AMDILMachineFunctionInfo *mMFI;
+ const TargetInstrInfo *mTII;
+ bool saveInst;
+protected:
+ void
+ emitStaticCPLoad(MachineInstr* MI, int swizzle, int id,
+ bool ExtFPLoad);
+ TargetMachine &TM;
+}; // class AMDILIOExpansion
+
+// Intermediate class that holds I/O code expansion that is common to the
+// 7XX, Evergreen and Northern Island family of chips.
+class AMDIL789IOExpansion : public AMDILIOExpansion
+{
+public:
+ virtual ~AMDIL789IOExpansion();
+ virtual const char* getPassName() const;
+protected:
+ AMDIL789IOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+ virtual void
+ expandGlobalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionStore(MachineInstr *MI) = 0;
+ virtual void
+ expandGlobalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandPrivateStore(MachineInstr *MI);
+ virtual void
+ expandConstantLoad(MachineInstr *MI);
+ virtual void
+ expandPrivateLoad(MachineInstr *MI) ;
+ virtual void
+ expandConstantPoolLoad(MachineInstr *MI);
+ void
+ expandStoreSetupCode(MachineInstr *MI);
+ virtual void
+ expandPackedData(MachineInstr *MI);
+private:
+ void emitVectorAddressCalc(MachineInstr *MI, bool is32bit,
+ bool needsSelect);
+ void emitVectorSwitchWrite(MachineInstr *MI, bool is32bit);
+ void emitComponentExtract(MachineInstr *MI, unsigned src,
+ unsigned dst, bool beforeInst);
+ void emitDataLoadSelect(MachineInstr *MI);
+}; // class AMDIL789IOExpansion
+// Class that handles I/O emission for the 7XX family of devices.
+class AMDIL7XXIOExpansion : public AMDIL789IOExpansion
+{
+public:
+ AMDIL7XXIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+
+ ~AMDIL7XXIOExpansion();
+ const char* getPassName() const;
+protected:
+ void
+ expandGlobalStore(MachineInstr *MI);
+ void
+ expandLocalStore(MachineInstr *MI);
+ void
+ expandRegionStore(MachineInstr *MI);
+ void
+ expandGlobalLoad(MachineInstr *MI);
+ void
+ expandRegionLoad(MachineInstr *MI);
+ void
+ expandLocalLoad(MachineInstr *MI);
+}; // class AMDIL7XXIOExpansion
+
+// Class that handles image functions to expand them into the
+// correct set of I/O instructions.
+class AMDILImageExpansion : public AMDIL789IOExpansion
+{
+public:
+ AMDILImageExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+
+ virtual ~AMDILImageExpansion();
+protected:
+ //
+ // @param MI Instruction iterator that has the sample instruction
+ // that needs to be taken care of.
+ // @brief transforms the __amdil_sample_data function call into a
+ // sample instruction in IL.
+ //
+ // @warning This function only works correctly if all functions get
+ // inlined
+ //
+ virtual void
+ expandImageLoad(MachineBasicBlock *BB, MachineInstr *MI);
+ //
+ // @param MI Instruction iterator that has the write instruction that
+ // needs to be taken care of.
+ // @brief transforms the __amdil_write_data function call into a
+ // simple UAV write instruction in IL.
+ //
+ // @warning This function only works correctly if all functions get
+ // inlined
+ //
+ virtual void
+ expandImageStore(MachineBasicBlock *BB, MachineInstr *MI);
+ //
+ // @param MI Instruction interator that has the image parameter
+ // instruction
+ // @brief transforms the __amdil_get_image_params function call into
+ // a copy of data from a specific constant buffer to the register
+ //
+ // @warning This function only works correctly if all functions get
+ // inlined
+ //
+ virtual void
+ expandImageParam(MachineBasicBlock *BB, MachineInstr *MI);
+
+ //
+ // @param MI Insturction that points to the image
+ // @brief transforms __amdil_sample_data into a sequence of
+ // if/else that selects the correct sample instruction.
+ //
+ // @warning This function is inefficient and works with no
+ // inlining.
+ //
+ virtual void
+ expandInefficientImageLoad(MachineBasicBlock *BB, MachineInstr *MI);
+private:
+ AMDILImageExpansion(); // Do not implement.
+
+}; // class AMDILImageExpansion
+
+// Class that expands IO instructions for Evergreen and Northern
+// Island family of devices.
+class AMDILEGIOExpansion : public AMDILImageExpansion
+{
+public:
+ AMDILEGIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+
+ virtual ~AMDILEGIOExpansion();
+ const char* getPassName() const;
+protected:
+ virtual bool
+ isIOInstruction(MachineInstr *MI);
+ virtual void
+ expandIOInstruction(MachineInstr *MI);
+ bool
+ isImageIO(MachineInstr *MI);
+ virtual void
+ expandGlobalStore(MachineInstr *MI);
+ void
+ expandLocalStore(MachineInstr *MI);
+ void
+ expandRegionStore(MachineInstr *MI);
+ virtual void
+ expandGlobalLoad(MachineInstr *MI);
+ void
+ expandRegionLoad(MachineInstr *MI);
+ void
+ expandLocalLoad(MachineInstr *MI);
+ virtual bool
+ isCacheableOp(MachineInstr *MI);
+ void
+ expandStoreSetupCode(MachineInstr *MI);
+ void
+ expandPackedData(MachineInstr *MI);
+private:
+ bool
+ isArenaOp(MachineInstr *MI);
+ void
+ expandArenaSetup(MachineInstr *MI);
+}; // class AMDILEGIOExpansion
+} // namespace llvm
+#endif // _AMDILIOEXPANSION_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,505 @@
+//===-- AMDILISelDAGToDAG.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AMDIL target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILDevices.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
+// //for SelectionDAG operations.
+//
+namespace
+{
+class AMDILDAGToDAGISel : public SelectionDAGISel
+{
+ // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const AMDILSubtarget *Subtarget;
+public:
+ explicit AMDILDAGToDAGISel(AMDILTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
+ Subtarget(&TM.getSubtarget<AMDILSubtarget>()) {}
+ virtual ~AMDILDAGToDAGISel() {};
+ inline SDValue getSmallIPtrImm(unsigned Imm);
+
+ SDNode *Select(SDNode *N);
+ // Complex pattern selectors
+ bool SelectADDR(
+ SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDR64(
+ SDValue N, SDValue &R1, SDValue &R2);
+ bool isGlobalStore(const StoreSDNode *N) const;
+ bool isPrivateStore(const StoreSDNode *N) const;
+ bool isLocalStore(const StoreSDNode *N) const;
+ bool isRegionStore(const StoreSDNode *N) const;
+ bool isFlatStore(const StoreSDNode *N) const;
+
+ bool isCPLoad(const LoadSDNode *N) const;
+ bool isConstantLoad(const LoadSDNode *N, int cbID) const;
+ bool isGlobalLoad(const LoadSDNode *N) const;
+ bool isPrivateLoad(const LoadSDNode *N) const;
+ bool isLocalLoad(const LoadSDNode *N) const;
+ bool isRegionLoad(const LoadSDNode *N) const;
+ bool isFlatLoad(const LoadSDNode *N) const;
+ bool isFlatASOverrideEnabled() const;
+
+ virtual const char *getPassName() const;
+private:
+ SDNode *xformAtomicInst(SDNode *N);
+
+ // Include the pieces autogenerated from the target description.
+#include "AMDILGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific
+// DAG, ready for instruction scheduling.
+//
+FunctionPass *llvm::createAMDILISelDag(AMDILTargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel)
+{
+ return new AMDILDAGToDAGISel(TM, OptLevel);
+}
+
+SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm)
+{
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDILDAGToDAGISel::SelectADDR(
+ SDValue Addr, SDValue& R1, SDValue& R2)
+{
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ return true;
+}
+
+
+bool AMDILDAGToDAGISel::SelectADDR64(
+ SDValue Addr, SDValue& R1, SDValue& R2)
+{
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ return true;
+}
+
+SDNode *AMDILDAGToDAGISel::Select(SDNode *N)
+{
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ switch (Opc) {
+ default:
+ break;
+ case ISD::FrameIndex: {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
+ unsigned int FI = FIN->getIndex();
+ EVT OpVT = N->getValueType(0);
+ unsigned int NewOpc = AMDIL::MOVE_i32;
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
+ }
+ }
+ break;
+ }
+ // For all atomic instructions, we need to add a constant
+ // operand that stores the resource ID in the instruction
+ if (Opc > AMDILISD::ADDADDR && Opc < AMDILISD::APPEND_ALLOC) {
+ N = xformAtomicInst(N);
+ }
+ return SelectCode(N);
+}
+
+bool AMDILDAGToDAGISel::isFlatASOverrideEnabled() const
+{
+ return Subtarget->overridesFlatAS();
+}
+
+bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) const
+{
+ return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isFlatStore(const StoreSDNode *N) const
+{
+ return check_type(N->getSrcValue(), AMDILAS::FLAT_ADDRESS)
+ || (isFlatASOverrideEnabled()
+ && (check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ || check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+ || check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
+ || check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS))
+ );
+}
+
+bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) const
+{
+ return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS))
+ && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) const
+{
+ return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) const
+{
+ return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) const
+{
+ if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+ && !isFlatASOverrideEnabled()) {
+ return true;
+ }
+ MachineMemOperand *MMO = N->getMemOperand();
+ const Value *V = MMO->getValue();
+ const Value *BV = getBasePointerValue(V);
+ if (MMO
+ && MMO->getValue()
+ && ((V && dyn_cast<GlobalValue>(V))
+ || (BV && dyn_cast<GlobalValue>(
+ getBasePointerValue(MMO->getValue()))))) {
+ return check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
+ && !isFlatASOverrideEnabled();
+ } else {
+ return false;
+ }
+}
+
+bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const
+{
+ return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const
+{
+ return check_type(N->getSrcValue(), AMDILAS::FLAT_ADDRESS)
+ || (isFlatASOverrideEnabled()
+ && (check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ || check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+ || check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
+ || check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS))
+ );
+}
+
+bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const
+{
+ return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const
+{
+ return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
+ && !isFlatASOverrideEnabled();
+}
+
+bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) const
+{
+ MachineMemOperand *MMO = N->getMemOperand();
+ if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
+ && !isFlatASOverrideEnabled()) {
+ if (MMO) {
+ const Value *V = MMO->getValue();
+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const
+{
+ if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
+ && !isFlatASOverrideEnabled()) {
+ // Check to make sure we are not a constant pool load or a constant load
+ // that is marked as a private load
+ if (isCPLoad(N) || isConstantLoad(N, -1)) {
+ return false;
+ }
+ }
+ if (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+ && !isFlatASOverrideEnabled()) {
+ return true;
+ }
+ return false;
+}
+
+const char *AMDILDAGToDAGISel::getPassName() const
+{
+ return "AMDIL DAG->DAG Pattern Instruction Selection";
+}
+
+SDNode*
+AMDILDAGToDAGISel::xformAtomicInst(SDNode *N)
+{
+ uint32_t addVal = 1;
+ bool addOne = false;
+ unsigned opc = N->getOpcode();
+ switch (opc) {
+ default:
+ return N;
+ case AMDILISD::ATOM_G_ADD:
+ case AMDILISD::ATOM_G_AND:
+ case AMDILISD::ATOM_G_MAX:
+ case AMDILISD::ATOM_G_UMAX:
+ case AMDILISD::ATOM_G_MIN:
+ case AMDILISD::ATOM_G_UMIN:
+ case AMDILISD::ATOM_G_OR:
+ case AMDILISD::ATOM_G_SUB:
+ case AMDILISD::ATOM_G_RSUB:
+ case AMDILISD::ATOM_G_XCHG:
+ case AMDILISD::ATOM_G_XOR:
+ case AMDILISD::ATOM_G_ADD_NORET:
+ case AMDILISD::ATOM_G_AND_NORET:
+ case AMDILISD::ATOM_G_MAX_NORET:
+ case AMDILISD::ATOM_G_UMAX_NORET:
+ case AMDILISD::ATOM_G_MIN_NORET:
+ case AMDILISD::ATOM_G_UMIN_NORET:
+ case AMDILISD::ATOM_G_OR_NORET:
+ case AMDILISD::ATOM_G_SUB_NORET:
+ case AMDILISD::ATOM_G_RSUB_NORET:
+ case AMDILISD::ATOM_G_XCHG_NORET:
+ case AMDILISD::ATOM_G_XOR_NORET:
+ case AMDILISD::ATOM_L_ADD:
+ case AMDILISD::ATOM_L_AND:
+ case AMDILISD::ATOM_L_MAX:
+ case AMDILISD::ATOM_L_UMAX:
+ case AMDILISD::ATOM_L_MIN:
+ case AMDILISD::ATOM_L_UMIN:
+ case AMDILISD::ATOM_L_OR:
+ case AMDILISD::ATOM_L_SUB:
+ case AMDILISD::ATOM_L_RSUB:
+ case AMDILISD::ATOM_L_XCHG:
+ case AMDILISD::ATOM_L_XOR:
+ case AMDILISD::ATOM_L_ADD_NORET:
+ case AMDILISD::ATOM_L_AND_NORET:
+ case AMDILISD::ATOM_L_MAX_NORET:
+ case AMDILISD::ATOM_L_UMAX_NORET:
+ case AMDILISD::ATOM_L_MIN_NORET:
+ case AMDILISD::ATOM_L_UMIN_NORET:
+ case AMDILISD::ATOM_L_OR_NORET:
+ case AMDILISD::ATOM_L_SUB_NORET:
+ case AMDILISD::ATOM_L_RSUB_NORET:
+ case AMDILISD::ATOM_L_XCHG_NORET:
+ case AMDILISD::ATOM_L_XOR_NORET:
+ case AMDILISD::ATOM_R_ADD:
+ case AMDILISD::ATOM_R_AND:
+ case AMDILISD::ATOM_R_MAX:
+ case AMDILISD::ATOM_R_UMAX:
+ case AMDILISD::ATOM_R_MIN:
+ case AMDILISD::ATOM_R_UMIN:
+ case AMDILISD::ATOM_R_OR:
+ case AMDILISD::ATOM_R_SUB:
+ case AMDILISD::ATOM_R_RSUB:
+ case AMDILISD::ATOM_R_XCHG:
+ case AMDILISD::ATOM_R_XOR:
+ case AMDILISD::ATOM_R_ADD_NORET:
+ case AMDILISD::ATOM_R_AND_NORET:
+ case AMDILISD::ATOM_R_MAX_NORET:
+ case AMDILISD::ATOM_R_UMAX_NORET:
+ case AMDILISD::ATOM_R_MIN_NORET:
+ case AMDILISD::ATOM_R_UMIN_NORET:
+ case AMDILISD::ATOM_R_OR_NORET:
+ case AMDILISD::ATOM_R_SUB_NORET:
+ case AMDILISD::ATOM_R_RSUB_NORET:
+ case AMDILISD::ATOM_R_XCHG_NORET:
+ case AMDILISD::ATOM_R_XOR_NORET:
+ case AMDILISD::ATOM_G_CMPXCHG:
+ case AMDILISD::ATOM_G_CMPXCHG_NORET:
+ case AMDILISD::ATOM_L_CMPXCHG:
+ case AMDILISD::ATOM_L_CMPXCHG_NORET:
+ case AMDILISD::ATOM_R_CMPXCHG:
+ case AMDILISD::ATOM_R_CMPXCHG_NORET:
+ break;
+ case AMDILISD::ATOM_G_DEC:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_G_INC:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_ADD;
+ }
+ break;
+ case AMDILISD::ATOM_G_DEC_NORET:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_SUB_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_G_INC_NORET:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_ADD_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_L_DEC:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_L_INC:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_ADD;
+ }
+ break;
+ case AMDILISD::ATOM_L_DEC_NORET:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_SUB_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_L_INC_NORET:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_ADD_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_R_DEC:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_R_INC:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_ADD;
+ }
+ break;
+ case AMDILISD::ATOM_R_DEC_NORET:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_R_INC_NORET:
+ addOne = true;
+ if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_ADD_NORET;
+ }
+ break;
+ }
+ // The largest we can have is a cmpxchg w/ a return value and an output chain.
+ // The cmpxchg function has 3 inputs and a single output along with an
+ // output change and a target constant, giving a total of 6.
+ SDValue Ops[12];
+ unsigned x = 0;
+ unsigned y = N->getNumOperands();
+ for (x = 0; x < y; ++x) {
+ Ops[x] = N->getOperand(x);
+ }
+ if (addOne) {
+ Ops[x++] = SDValue(SelectCode(CurDAG->getConstant(addVal, MVT::i32).getNode()), 0);
+ }
+ Ops[x++] = CurDAG->getTargetConstant(0, MVT::i32);
+ SDVTList Tys = N->getVTList();
+ MemSDNode *MemNode = dyn_cast<MemSDNode>(N);
+ assert(MemNode && "Atomic should be of MemSDNode type!");
+ N = CurDAG->getMemIntrinsicNode(opc, N->getDebugLoc(), Tys, Ops, x,
+ MemNode->getMemoryVT(), MemNode->getMemOperand()).getNode();
+ return N;
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,6010 @@
+//===-- AMDILISelLowering.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that AMDIL uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILISelLowering.h"
+#include "AMDILDevices.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILLLVMPC.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetOptions.h"
+#include "../../CodeGen/SelectionDAG/SDNodeDbgValue.h"
+using namespace llvm;
+#define ISDBITCAST ISD::BITCAST
+#define MVTGLUE MVT::Glue
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+#include "AMDILGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions Begin
+//===----------------------------------------------------------------------===//
+static SDValue
+getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
+{
+ DebugLoc DL = Src.getDebugLoc();
+ EVT svt = Src.getValueType().getScalarType();
+ EVT dvt = Dst.getValueType().getScalarType();
+ if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
+ if (dvt.bitsGT(svt)) {
+ Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
+ } else if (svt.bitsLT(svt)) {
+ Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
+ DAG.getConstant(1, MVT::i32));
+ }
+ } else if (svt.isInteger() && dvt.isInteger()) {
+ if (!svt.bitsEq(dvt)) {
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ } else {
+ Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
+ }
+ } else if (svt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
+ if (!svt.bitsEq(dvt)) {
+ if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
+ } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ }
+ Src = DAG.getNode(opcode, DL, dvt, Src);
+ } else if (dvt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
+ if (svt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getNode(opcode, DL, MVT::i32, Src);
+ } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getNode(opcode, DL, MVT::i64, Src);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ }
+ return Src;
+}
+// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
+// condition.
+static AMDILCC::CondCodes
+CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
+{
+ switch (CC) {
+ default: {
+ errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
+ assert(0 && "Unknown condition code!");
+ }
+ case ISD::SETO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_O;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_O;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UO;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UO;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_NE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_NE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_EQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_EQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UNE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UNE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UEQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETONE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ONE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ONE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOEQ:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OEQ;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ };
+}
+
+static unsigned int
+translateToOpcode(uint64_t CCCode, unsigned int regClass)
+{
+ switch (CCCode) {
+ case AMDILCC::IL_CC_D_EQ:
+ case AMDILCC::IL_CC_D_OEQ:
+ if (regClass == AMDIL::GPRV2F64RegClassID) {
+ return (unsigned int)AMDIL::DEQ_v2f64;
+ } else {
+ return (unsigned int)AMDIL::DEQ;
+ }
+ case AMDILCC::IL_CC_D_LE:
+ case AMDILCC::IL_CC_D_OLE:
+ case AMDILCC::IL_CC_D_ULE:
+ case AMDILCC::IL_CC_D_GE:
+ case AMDILCC::IL_CC_D_OGE:
+ case AMDILCC::IL_CC_D_UGE:
+ return (unsigned int)AMDIL::DGE;
+ case AMDILCC::IL_CC_D_LT:
+ case AMDILCC::IL_CC_D_OLT:
+ case AMDILCC::IL_CC_D_ULT:
+ case AMDILCC::IL_CC_D_GT:
+ case AMDILCC::IL_CC_D_OGT:
+ case AMDILCC::IL_CC_D_UGT:
+ return (unsigned int)AMDIL::DLT;
+ case AMDILCC::IL_CC_D_NE:
+ case AMDILCC::IL_CC_D_UNE:
+ return (unsigned int)AMDIL::DNE;
+ case AMDILCC::IL_CC_F_EQ:
+ case AMDILCC::IL_CC_F_OEQ:
+ return (unsigned int)AMDIL::FEQ;
+ case AMDILCC::IL_CC_F_LE:
+ case AMDILCC::IL_CC_F_ULE:
+ case AMDILCC::IL_CC_F_OLE:
+ case AMDILCC::IL_CC_F_GE:
+ case AMDILCC::IL_CC_F_UGE:
+ case AMDILCC::IL_CC_F_OGE:
+ return (unsigned int)AMDIL::FGE;
+ case AMDILCC::IL_CC_F_LT:
+ case AMDILCC::IL_CC_F_OLT:
+ case AMDILCC::IL_CC_F_ULT:
+ case AMDILCC::IL_CC_F_GT:
+ case AMDILCC::IL_CC_F_OGT:
+ case AMDILCC::IL_CC_F_UGT:
+ if (regClass == AMDIL::GPRV2F32RegClassID) {
+ return (unsigned int)AMDIL::FLT_v2f32;
+ } else if (regClass == AMDIL::GPRV4F32RegClassID) {
+ return (unsigned int)AMDIL::FLT_v4f32;
+ } else {
+ return (unsigned int)AMDIL::FLT;
+ }
+ case AMDILCC::IL_CC_F_NE:
+ case AMDILCC::IL_CC_F_UNE:
+ return (unsigned int)AMDIL::FNE;
+ case AMDILCC::IL_CC_I_EQ:
+ case AMDILCC::IL_CC_U_EQ:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IEQ;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRV2I8RegClassID
+ || regClass == AMDIL::GPRV2I16RegClassID) {
+ return (unsigned int)AMDIL::IEQ_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRV4I8RegClassID
+ || regClass == AMDIL::GPRV4I16RegClassID) {
+ return (unsigned int)AMDIL::IEQ_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_EQ:
+ case AMDILCC::IL_CC_UL_EQ:
+ return (unsigned int)AMDIL::LEQ;
+ case AMDILCC::IL_CC_I_GE:
+ case AMDILCC::IL_CC_I_LE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_I_LT:
+ case AMDILCC::IL_CC_I_GT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_GE:
+ return (unsigned int)AMDIL::LGE;
+ case AMDILCC::IL_CC_L_LE:
+ return (unsigned int)AMDIL::LLE;
+ case AMDILCC::IL_CC_L_LT:
+ return (unsigned int)AMDIL::LLT;
+ case AMDILCC::IL_CC_L_GT:
+ return (unsigned int)AMDIL::LGT;
+ case AMDILCC::IL_CC_I_NE:
+ case AMDILCC::IL_CC_U_NE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_U_GE:
+ case AMDILCC::IL_CC_U_LE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_NE:
+ case AMDILCC::IL_CC_UL_NE:
+ return (unsigned int)AMDIL::LNE;
+ case AMDILCC::IL_CC_UL_GE:
+ return (unsigned int)AMDIL::ULGE;
+ case AMDILCC::IL_CC_UL_LE:
+ return (unsigned int)AMDIL::ULLE;
+ case AMDILCC::IL_CC_U_LT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_U_GT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_UL_LT:
+ return (unsigned int)AMDIL::ULLT;
+ case AMDILCC::IL_CC_UL_GT:
+ return (unsigned int)AMDIL::ULGT;
+ case AMDILCC::IL_CC_F_UEQ:
+ case AMDILCC::IL_CC_D_UEQ:
+ case AMDILCC::IL_CC_F_ONE:
+ case AMDILCC::IL_CC_D_ONE:
+ case AMDILCC::IL_CC_F_O:
+ case AMDILCC::IL_CC_F_UO:
+ case AMDILCC::IL_CC_D_O:
+ case AMDILCC::IL_CC_D_UO:
+ // we don't care
+ return 0;
+
+ }
+ errs()<<"Opcode: "<<CCCode<<"\n";
+ assert(0 && "Unknown opcode retrieved");
+ return 0;
+}
+SDValue
+AMDILTargetLowering::LowerMemArgument(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned i) const
+{
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
+ getTargetMachine().Options.GuaranteedTailCallOpt;
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can
+ // be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable
+ );
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal())
+ return FIN;
+ return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+}
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction generation functions
+//===----------------------------------------------------------------------===//
+uint32_t
+AMDILTargetLowering::addExtensionInstructions(
+ uint32_t reg, bool signedShift,
+ unsigned int simpleVT) const
+{
+ int shiftSize = 0;
+ uint32_t LShift, RShift, Promote, Demote;
+ uint32_t intRegClassID;
+ switch(simpleVT) {
+ default:
+ return reg;
+ case AMDIL::GPRI8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_i32;
+ if (signedShift) {
+ RShift = AMDIL::SHR_i32;
+ } else {
+ RShift = AMDIL::USHR_i32;
+ }
+ Promote = AMDIL::IL_ASINT_i8;
+ Demote = AMDIL::IL_ASCHAR_i32;
+ intRegClassID = AMDIL::GPRI32RegClassID;
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_v2i32;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v2i32;
+ } else {
+ RShift = AMDIL::USHR_v2i32;
+ }
+ Promote = AMDIL::IL_ASV2INT_v2i8;
+ Demote = AMDIL::IL_ASV2CHAR_v2i32;
+ intRegClassID = AMDIL::GPRV2I32RegClassID;
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_v4i32;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v4i32;
+ } else {
+ RShift = AMDIL::USHR_v4i32;
+ }
+ Promote = AMDIL::IL_ASV4INT_v4i8;
+ Demote = AMDIL::IL_ASV4CHAR_v4i32;
+ intRegClassID = AMDIL::GPRV4I32RegClassID;
+ break;
+ case AMDIL::GPRI16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_i32;
+ if (signedShift) {
+ RShift = AMDIL::SHR_i32;
+ } else {
+ RShift = AMDIL::USHR_i32;
+ }
+ Promote = AMDIL::IL_ASINT_i16;
+ Demote = AMDIL::IL_ASSHORT_i32;
+ intRegClassID = AMDIL::GPRI32RegClassID;
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_v2i32;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v2i32;
+ } else {
+ RShift = AMDIL::USHR_v2i32;
+ }
+ Promote = AMDIL::IL_ASV2INT_v2i16;
+ Demote = AMDIL::IL_ASV2SHORT_v2i32;
+ intRegClassID = AMDIL::GPRV2I32RegClassID;
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_v4i32;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v4i32;
+ } else {
+ RShift = AMDIL::USHR_v4i32;
+ }
+ Promote = AMDIL::IL_ASV4INT_v4i16;
+ Demote = AMDIL::IL_ASV4SHORT_v4i32;
+ intRegClassID = AMDIL::GPRV4I32RegClassID;
+ break;
+ };
+ uint32_t LoadReg = genVReg(simpleVT);
+ uint32_t tmp1 = genVReg(intRegClassID);
+ uint32_t tmp2 = genVReg(intRegClassID);
+ uint32_t tmp3 = genVReg(intRegClassID);
+ uint32_t dst = genVReg(simpleVT);
+ generateMachineInst(Promote, tmp1, reg);
+ generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
+ generateMachineInst(LShift, tmp2, tmp1, LoadReg);
+ generateMachineInst(RShift, tmp3, tmp2, LoadReg);
+ generateMachineInst(Demote, dst, tmp3);
+ return dst;
+}
+
+MachineOperand
+AMDILTargetLowering::convertToReg(MachineOperand op) const
+{
+ if (op.isReg()) {
+ return op;
+ } else if (op.isImm()) {
+ uint32_t loadReg
+ = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
+ generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
+ .addImm(op.getImm());
+ op.ChangeToRegister(loadReg, false);
+ } else if (op.isFPImm()) {
+ uint32_t loadReg
+ = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
+ generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
+ .addFPImm(op.getFPImm());
+ op.ChangeToRegister(loadReg, false);
+ } else if (op.isMBB()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isFI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isCPI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isJTI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isGlobal()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isSymbol()) {
+ op.ChangeToRegister(0, false);
+ }/* else if (op.isMetadata()) {
+ op.ChangeToRegister(0, false);
+ }*/
+ return op;
+}
+
+void
+AMDILTargetLowering::generateCMPInstr(
+ MachineInstr *MI,
+ MachineBasicBlock *BB,
+ const TargetInstrInfo& TII)
+const
+{
+ MachineOperand DST = MI->getOperand(0);
+ MachineOperand CC = MI->getOperand(1);
+ MachineOperand LHS = MI->getOperand(2);
+ MachineOperand RHS = MI->getOperand(3);
+ int64_t ccCode = CC.getImm();
+ unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
+ unsigned int opCode = translateToOpcode(ccCode, simpleVT);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock::iterator BBI = MI;
+ setPrivateData(BB, BBI, &DL, &TII);
+ if (!LHS.isReg()) {
+ LHS = convertToReg(LHS);
+ }
+ if (!RHS.isReg()) {
+ RHS = convertToReg(RHS);
+ }
+ uint32_t lhsreg = LHS.getReg();
+ uint32_t rhsreg = RHS.getReg();
+ switch (ccCode) {
+ case AMDILCC::IL_CC_I_EQ:
+ case AMDILCC::IL_CC_I_NE:
+ case AMDILCC::IL_CC_I_GE:
+ case AMDILCC::IL_CC_I_LT:
+ case AMDILCC::IL_CC_I_GT:
+ case AMDILCC::IL_CC_I_LE: {
+ lhsreg = addExtensionInstructions(lhsreg, true, simpleVT);
+ rhsreg = addExtensionInstructions(rhsreg, true, simpleVT);
+ }
+ break;
+ case AMDILCC::IL_CC_U_EQ:
+ case AMDILCC::IL_CC_U_NE:
+ case AMDILCC::IL_CC_U_GE:
+ case AMDILCC::IL_CC_U_LT:
+ case AMDILCC::IL_CC_U_GT:
+ case AMDILCC::IL_CC_U_LE: {
+ lhsreg = addExtensionInstructions(lhsreg, false, simpleVT);
+ rhsreg = addExtensionInstructions(rhsreg, false, simpleVT);
+ }
+ break;
+ }
+
+ switch (ccCode) {
+ case AMDILCC::IL_CC_I_EQ:
+ case AMDILCC::IL_CC_I_NE:
+ case AMDILCC::IL_CC_I_GE:
+ case AMDILCC::IL_CC_I_LT:
+ case AMDILCC::IL_CC_U_EQ:
+ case AMDILCC::IL_CC_U_NE:
+ case AMDILCC::IL_CC_U_GE:
+ case AMDILCC::IL_CC_U_LT:
+ case AMDILCC::IL_CC_D_EQ:
+ case AMDILCC::IL_CC_F_EQ:
+ case AMDILCC::IL_CC_F_OEQ:
+ case AMDILCC::IL_CC_D_OEQ:
+ case AMDILCC::IL_CC_D_NE:
+ case AMDILCC::IL_CC_F_NE:
+ case AMDILCC::IL_CC_F_UNE:
+ case AMDILCC::IL_CC_D_UNE:
+ case AMDILCC::IL_CC_D_GE:
+ case AMDILCC::IL_CC_F_GE:
+ case AMDILCC::IL_CC_D_OGE:
+ case AMDILCC::IL_CC_F_OGE:
+ case AMDILCC::IL_CC_D_LT:
+ case AMDILCC::IL_CC_F_LT:
+ case AMDILCC::IL_CC_F_OLT:
+ case AMDILCC::IL_CC_D_OLT:
+ generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
+ break;
+ case AMDILCC::IL_CC_I_GT:
+ case AMDILCC::IL_CC_I_LE:
+ case AMDILCC::IL_CC_U_GT:
+ case AMDILCC::IL_CC_U_LE:
+ case AMDILCC::IL_CC_F_GT:
+ case AMDILCC::IL_CC_D_GT:
+ case AMDILCC::IL_CC_F_OGT:
+ case AMDILCC::IL_CC_D_OGT:
+ case AMDILCC::IL_CC_F_LE:
+ case AMDILCC::IL_CC_D_LE:
+ case AMDILCC::IL_CC_D_OLE:
+ case AMDILCC::IL_CC_F_OLE:
+ generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
+ break;
+ case AMDILCC::IL_CC_F_UGT:
+ case AMDILCC::IL_CC_F_ULE: {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(opCode, VReg[0], rhsreg, lhsreg);
+ generateMachineInst(AMDIL::FNE, VReg[1], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::FNE, VReg[2], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_ULT:
+ case AMDILCC::IL_CC_F_UGE: {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(opCode, VReg[0], lhsreg, rhsreg);
+ generateMachineInst(AMDIL::FNE, VReg[1], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::FNE, VReg[2], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UGT:
+ case AMDILCC::IL_CC_D_ULE: {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(opCode, VReg[0], rhsreg, lhsreg);
+ generateMachineInst(AMDIL::DNE, VReg[1], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::DNE, VReg[2], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UGE:
+ case AMDILCC::IL_CC_D_ULT: {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(opCode, VReg[0], lhsreg, rhsreg);
+ generateMachineInst(AMDIL::DNE, VReg[1], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::DNE, VReg[2], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_UEQ: {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(AMDIL::FEQ, VReg[0], lhsreg, rhsreg);
+ generateMachineInst(AMDIL::FNE, VReg[1], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::FNE, VReg[2], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_ONE: {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(AMDIL::FNE, VReg[0], lhsreg, rhsreg);
+ generateMachineInst(AMDIL::FEQ, VReg[1], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::FEQ, VReg[2], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UEQ: {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DEQ, VReg[0], lhsreg, rhsreg);
+ generateMachineInst(AMDIL::DNE, VReg[1], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::DNE, VReg[2], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+
+ }
+ break;
+ case AMDILCC::IL_CC_D_ONE: {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DNE, VReg[0], lhsreg, rhsreg);
+ generateMachineInst(AMDIL::DEQ, VReg[1], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::DEQ, VReg[2], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+
+ }
+ break;
+ case AMDILCC::IL_CC_F_O: {
+ uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
+ generateMachineInst(AMDIL::FEQ, VReg[0], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::FEQ, VReg[1], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_O: {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DEQ, VReg[0], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::DEQ, VReg[1], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_UO: {
+ uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
+ generateMachineInst(AMDIL::FNE, VReg[0], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::FNE, VReg[1], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UO: {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DNE, VReg[0], rhsreg, rhsreg);
+ generateMachineInst(AMDIL::DNE, VReg[1], lhsreg, lhsreg);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_L_LE:
+ case AMDILCC::IL_CC_L_GE:
+ case AMDILCC::IL_CC_L_EQ:
+ case AMDILCC::IL_CC_L_NE:
+ case AMDILCC::IL_CC_L_LT:
+ case AMDILCC::IL_CC_L_GT:
+ case AMDILCC::IL_CC_UL_LE:
+ case AMDILCC::IL_CC_UL_GE:
+ case AMDILCC::IL_CC_UL_EQ:
+ case AMDILCC::IL_CC_UL_NE:
+ case AMDILCC::IL_CC_UL_LT:
+ case AMDILCC::IL_CC_UL_GT: {
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
+ generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
+ } else {
+ generateLongRelational(MI, opCode);
+ }
+ }
+ break;
+ case AMDILCC::COND_ERROR:
+ assert(0 && "Invalid CC code");
+ break;
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF())
+{
+ setBooleanVectorContents( ZeroOrNegativeOneBooleanContent );
+ int types[] = {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::f32,
+ (int)MVT::f64,
+ (int)MVT::i64,
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+
+ int IntTypes[] = {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::i64
+ };
+
+ int FloatTypes[] = {
+ (int)MVT::f32,
+ (int)MVT::f64
+ };
+
+ int VectorTypes[] = {
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+ size_t numTypes = sizeof(types) / sizeof(*types);
+ size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+ size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+ size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ // These are the current register classes that are
+ // supported
+
+ addRegisterClass(MVT::i32, &AMDIL::GPRI32RegClass);
+ addRegisterClass(MVT::f32, &AMDIL::GPRF32RegClass);
+
+ if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ addRegisterClass(MVT::f64, &AMDIL::GPRF64RegClass);
+ addRegisterClass(MVT::v2f64, &AMDIL::GPRV2F64RegClass);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
+ addRegisterClass(MVT::i8, &AMDIL::GPRI8RegClass);
+ addRegisterClass(MVT::v2i8, &AMDIL::GPRV2I8RegClass);
+ addRegisterClass(MVT::v4i8, &AMDIL::GPRV4I8RegClass);
+ setOperationAction(ISD::Constant , MVT::i8 , Legal);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
+ addRegisterClass(MVT::i16, &AMDIL::GPRI16RegClass);
+ addRegisterClass(MVT::v2i16, &AMDIL::GPRV2I16RegClass);
+ addRegisterClass(MVT::v4i16, &AMDIL::GPRV4I16RegClass);
+ setOperationAction(ISD::Constant , MVT::i16 , Legal);
+ }
+ addRegisterClass(MVT::v2f32, &AMDIL::GPRV2F32RegClass);
+ addRegisterClass(MVT::v4f32, &AMDIL::GPRV4F32RegClass);
+ addRegisterClass(MVT::v2i32, &AMDIL::GPRV2I32RegClass);
+ addRegisterClass(MVT::v4i32, &AMDIL::GPRV4I32RegClass);
+ if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
+ addRegisterClass(MVT::i64, &AMDIL::GPRI64RegClass);
+ addRegisterClass(MVT::v2i64, &AMDIL::GPRV2I64RegClass);
+ }
+
+ // Make some ops legal since the "generic" target lowering made them expand
+ // (See lib/CodeGen/SelectionDag/TargetLowering.cpp)
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FLOG , MVT::f32, Legal);
+ // Set explicitly to expand in case default changes
+ setOperationAction(ISD::FRINT, MVT::f32, Expand);
+
+ for (unsigned int x = 0; x < numTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+ // We cannot sextinreg, expand to shifts
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+ setOperationAction(ISD::OR, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::BRCOND, VT, Custom);
+ setOperationAction(ISD::BR_CC, VT, Custom);
+ setOperationAction(ISD::BR_JT, VT, Expand);
+ setOperationAction(ISD::BRIND, VT, Expand);
+ // TODO: Implement custom UREM/SREM routines
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISDBITCAST, VT, Custom);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ if (VT != MVT::i64 && VT != MVT::v2i64) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ }
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ }
+ for (unsigned int x = 0; x < numFloatTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+ // IL does not have these operations for floating point types
+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::SETOLT, VT, Expand);
+ setOperationAction(ISD::SETOGE, VT, Expand);
+ setOperationAction(ISD::SETOGT, VT, Expand);
+ setOperationAction(ISD::SETOLE, VT, Expand);
+ setOperationAction(ISD::SETULT, VT, Expand);
+ setOperationAction(ISD::SETUGE, VT, Expand);
+ setOperationAction(ISD::SETUGT, VT, Expand);
+ setOperationAction(ISD::SETULE, VT, Expand);
+ }
+
+ for (unsigned int x = 0; x < numIntTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+ // GPU also does not have divrem function for signed or unsigned
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ // GPU doesn't have a rotl, rotr, or byteswap instruction
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ // GPU doesn't have any counting operators
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ // setOperationAction(ISD::VSETCC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
+
+ }
+ setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
+ if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
+ if (stm->calVersion() < CAL_VERSION_SC_139
+ || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ setOperationAction(ISD::MUL, MVT::i64, Custom);
+ }
+ setOperationAction(ISD::SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+ setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ // we support loading/storing v2f64 but not operations on the type
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+ // We want to expand vector conversions into their scalar
+ // counterparts.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ }
+ // TODO: Fix the UDIV24 algorithm so it works for these
+ // types correctly. This needs vector comparisons
+ // for this to work correctly.
+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::Other, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+ setOperationAction(ISD::FDIV, MVT::f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
+ setOperationAction(ISD::TRAP , MVT::Other , Legal);
+
+ setStackPointerRegisterToSaveRestore(AMDIL::SP);
+ setSchedulingPreference(Sched::RegPressure);
+ setPow2DivIsCheap(false);
+ setPrefLoopAlignment(16);
+ setSelectIsExpensive(true);
+ setJumpIsExpensive(true);
+ computeRegisterProperties();
+
+ maxStoresPerMemcpy = 4096;
+ maxStoresPerMemmove = 4096;
+ maxStoresPerMemset = 4096;
+
+#undef numTypes
+#undef numIntTypes
+#undef numVectorTypes
+#undef numFloatTypes
+}
+
+// This only works for region/local/global address spaces on EG/NI as
+// the other address spaces required 128 bit alignement of loads/stores.
+// However, there is no way to disable for those address spaces
+// and only for specific types.
+// TODO: Modify this API call to pass in the address space/instruction
+bool
+AMDILTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const
+{
+ const AMDILSubtarget *STM = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ // 7XX does not allow unaligned memory accesses
+ if (STM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ return false;
+ }
+ return (VT == MVT::v4f32 || VT == MVT::v4i32
+ || VT == MVT::v2f32 || VT == MVT::v2i32
+ || VT == MVT::f64 || VT == MVT::i64
+ || VT == MVT::v2f64 || VT == MVT::v2i64);
+}
+
+const char *
+AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode) {
+ default:
+ return 0;
+ case AMDILISD::INTTOANY:
+ return "AMDILISD::INTTOANY";
+ case AMDILISD::DP_TO_FP:
+ return "AMDILISD::DP_TO_FP";
+ case AMDILISD::FP_TO_DP:
+ return "AMDILISD::FP_TO_DP";
+ case AMDILISD::BITCONV:
+ return "AMDILISD::BITCONV";
+ case AMDILISD::CMOV:
+ return "AMDILISD::CMOV";
+ case AMDILISD::CMOVLOG:
+ return "AMDILISD::CMOVLOG";
+ case AMDILISD::INEGATE:
+ return "AMDILISD::INEGATE";
+ case AMDILISD::MAD:
+ return "AMDILISD::MAD";
+ case AMDILISD::UMAD:
+ return "AMDILISD::UMAD";
+ case AMDILISD::CALL:
+ return "AMDILISD::CALL";
+ case AMDILISD::RET:
+ return "AMDILISD::RET";
+ case AMDILISD::IFFB_HI:
+ return "AMDILISD::IFFB_HI";
+ case AMDILISD::IFFB_LO:
+ return "AMDILISD::IFFB_LO";
+ case AMDILISD::ADD:
+ return "AMDILISD::ADD";
+ case AMDILISD::UMUL:
+ return "AMDILISD::UMUL";
+ case AMDILISD::AND:
+ return "AMDILISD::AND";
+ case AMDILISD::OR:
+ return "AMDILISD::OR";
+ case AMDILISD::NOT:
+ return "AMDILISD::NOT";
+ case AMDILISD::XOR:
+ return "AMDILISD::XOR";
+ case AMDILISD::DIV_INF:
+ return "AMDILISD::DIV_INF";
+ case AMDILISD::SMAX:
+ return "AMDILISD::SMAX";
+ case AMDILISD::PHIMOVE:
+ return "AMDILISD::PHIMOVE";
+ case AMDILISD::MOVE:
+ return "AMDILISD::MOVE";
+ case AMDILISD::VBUILD:
+ return "AMDILISD::VBUILD";
+ case AMDILISD::VEXTRACT:
+ return "AMDILISD::VEXTRACT";
+ case AMDILISD::VINSERT:
+ return "AMDILISD::VINSERT";
+ case AMDILISD::VCONCAT:
+ return "AMDILISD::VCONCAT";
+ case AMDILISD::LCREATE:
+ return "AMDILISD::LCREATE";
+ case AMDILISD::LCOMPHI:
+ return "AMDILISD::LCOMPHI";
+ case AMDILISD::LCOMPLO:
+ return "AMDILISD::LCOMPLO";
+ case AMDILISD::DCREATE:
+ return "AMDILISD::DCREATE";
+ case AMDILISD::DCOMPHI:
+ return "AMDILISD::DCOMPHI";
+ case AMDILISD::DCOMPLO:
+ return "AMDILISD::DCOMPLO";
+ case AMDILISD::LCREATE2:
+ return "AMDILISD::LCREATE2";
+ case AMDILISD::LCOMPHI2:
+ return "AMDILISD::LCOMPHI2";
+ case AMDILISD::LCOMPLO2:
+ return "AMDILISD::LCOMPLO2";
+ case AMDILISD::DCREATE2:
+ return "AMDILISD::DCREATE2";
+ case AMDILISD::DCOMPHI2:
+ return "AMDILISD::DCOMPHI2";
+ case AMDILISD::DCOMPLO2:
+ return "AMDILISD::DCOMPLO2";
+ case AMDILISD::CMP:
+ return "AMDILISD::CMP";
+ case AMDILISD::IL_CC_I_LT:
+ return "AMDILISD::IL_CC_I_LT";
+ case AMDILISD::IL_CC_I_LE:
+ return "AMDILISD::IL_CC_I_LE";
+ case AMDILISD::IL_CC_I_GT:
+ return "AMDILISD::IL_CC_I_GT";
+ case AMDILISD::IL_CC_I_GE:
+ return "AMDILISD::IL_CC_I_GE";
+ case AMDILISD::IL_CC_I_EQ:
+ return "AMDILISD::IL_CC_I_EQ";
+ case AMDILISD::IL_CC_I_NE:
+ return "AMDILISD::IL_CC_I_NE";
+ case AMDILISD::RET_FLAG:
+ return "AMDILISD::RET_FLAG";
+ case AMDILISD::BRANCH_COND:
+ return "AMDILISD::BRANCH_COND";
+ case AMDILISD::LOOP_NZERO:
+ return "AMDILISD::LOOP_NZERO";
+ case AMDILISD::LOOP_ZERO:
+ return "AMDILISD::LOOP_ZERO";
+ case AMDILISD::LOOP_CMP:
+ return "AMDILISD::LOOP_CMP";
+ case AMDILISD::ADDADDR:
+ return "AMDILISD::ADDADDR";
+ case AMDILISD::ATOM_F_ADD:
+ return "AMDILISD::ATOM_F_ADD";
+ case AMDILISD::ATOM_F_AND:
+ return "AMDILISD::ATOM_F_AND";
+ case AMDILISD::ATOM_F_CMPXCHG:
+ return "AMDILISD::ATOM_F_CMPXCHG";
+ case AMDILISD::ATOM_F_DEC:
+ return "AMDILISD::ATOM_F_DEC";
+ case AMDILISD::ATOM_F_INC:
+ return "AMDILISD::ATOM_F_INC";
+ case AMDILISD::ATOM_F_MAX:
+ return "AMDILISD::ATOM_F_MAX";
+ case AMDILISD::ATOM_F_UMAX:
+ return "AMDILISD::ATOM_F_UMAX";
+ case AMDILISD::ATOM_F_MIN:
+ return "AMDILISD::ATOM_F_MIN";
+ case AMDILISD::ATOM_F_UMIN:
+ return "AMDILISD::ATOM_F_UMIN";
+ case AMDILISD::ATOM_F_OR:
+ return "AMDILISD::ATOM_F_OR";
+ case AMDILISD::ATOM_F_SUB:
+ return "AMDILISD::ATOM_F_SUB";
+ case AMDILISD::ATOM_F_XCHG:
+ return "AMDILISD::ATOM_F_XCHG";
+ case AMDILISD::ATOM_F_XOR:
+ return "AMDILISD::ATOM_F_XOR";
+ case AMDILISD::ATOM_G_ADD:
+ return "AMDILISD::ATOM_G_ADD";
+ case AMDILISD::ATOM_G_AND:
+ return "AMDILISD::ATOM_G_AND";
+ case AMDILISD::ATOM_G_CMPXCHG:
+ return "AMDILISD::ATOM_G_CMPXCHG";
+ case AMDILISD::ATOM_G_DEC:
+ return "AMDILISD::ATOM_G_DEC";
+ case AMDILISD::ATOM_G_INC:
+ return "AMDILISD::ATOM_G_INC";
+ case AMDILISD::ATOM_G_MAX:
+ return "AMDILISD::ATOM_G_MAX";
+ case AMDILISD::ATOM_G_UMAX:
+ return "AMDILISD::ATOM_G_UMAX";
+ case AMDILISD::ATOM_G_MIN:
+ return "AMDILISD::ATOM_G_MIN";
+ case AMDILISD::ATOM_G_UMIN:
+ return "AMDILISD::ATOM_G_UMIN";
+ case AMDILISD::ATOM_G_OR:
+ return "AMDILISD::ATOM_G_OR";
+ case AMDILISD::ATOM_G_SUB:
+ return "AMDILISD::ATOM_G_SUB";
+ case AMDILISD::ATOM_G_RSUB:
+ return "AMDILISD::ATOM_G_RSUB";
+ case AMDILISD::ATOM_G_XCHG:
+ return "AMDILISD::ATOM_G_XCHG";
+ case AMDILISD::ATOM_G_XOR:
+ return "AMDILISD::ATOM_G_XOR";
+ case AMDILISD::ATOM_G_ADD_NORET:
+ return "AMDILISD::ATOM_G_ADD_NORET";
+ case AMDILISD::ATOM_G_AND_NORET:
+ return "AMDILISD::ATOM_G_AND_NORET";
+ case AMDILISD::ATOM_G_CMPXCHG_NORET:
+ return "AMDILISD::ATOM_G_CMPXCHG_NORET";
+ case AMDILISD::ATOM_G_DEC_NORET:
+ return "AMDILISD::ATOM_G_DEC_NORET";
+ case AMDILISD::ATOM_G_INC_NORET:
+ return "AMDILISD::ATOM_G_INC_NORET";
+ case AMDILISD::ATOM_G_MAX_NORET:
+ return "AMDILISD::ATOM_G_MAX_NORET";
+ case AMDILISD::ATOM_G_UMAX_NORET:
+ return "AMDILISD::ATOM_G_UMAX_NORET";
+ case AMDILISD::ATOM_G_MIN_NORET:
+ return "AMDILISD::ATOM_G_MIN_NORET";
+ case AMDILISD::ATOM_G_UMIN_NORET:
+ return "AMDILISD::ATOM_G_UMIN_NORET";
+ case AMDILISD::ATOM_G_OR_NORET:
+ return "AMDILISD::ATOM_G_OR_NORET";
+ case AMDILISD::ATOM_G_SUB_NORET:
+ return "AMDILISD::ATOM_G_SUB_NORET";
+ case AMDILISD::ATOM_G_RSUB_NORET:
+ return "AMDILISD::ATOM_G_RSUB_NORET";
+ case AMDILISD::ATOM_G_XCHG_NORET:
+ return "AMDILISD::ATOM_G_XCHG_NORET";
+ case AMDILISD::ATOM_G_XOR_NORET:
+ return "AMDILISD::ATOM_G_XOR_NORET";
+ case AMDILISD::ATOM_L_ADD:
+ return "AMDILISD::ATOM_L_ADD";
+ case AMDILISD::ATOM_L_AND:
+ return "AMDILISD::ATOM_L_AND";
+ case AMDILISD::ATOM_L_CMPXCHG:
+ return "AMDILISD::ATOM_L_CMPXCHG";
+ case AMDILISD::ATOM_L_DEC:
+ return "AMDILISD::ATOM_L_DEC";
+ case AMDILISD::ATOM_L_INC:
+ return "AMDILISD::ATOM_L_INC";
+ case AMDILISD::ATOM_L_MAX:
+ return "AMDILISD::ATOM_L_MAX";
+ case AMDILISD::ATOM_L_UMAX:
+ return "AMDILISD::ATOM_L_UMAX";
+ case AMDILISD::ATOM_L_MIN:
+ return "AMDILISD::ATOM_L_MIN";
+ case AMDILISD::ATOM_L_UMIN:
+ return "AMDILISD::ATOM_L_UMIN";
+ case AMDILISD::ATOM_L_OR:
+ return "AMDILISD::ATOM_L_OR";
+ case AMDILISD::ATOM_L_SUB:
+ return "AMDILISD::ATOM_L_SUB";
+ case AMDILISD::ATOM_L_RSUB:
+ return "AMDILISD::ATOM_L_RSUB";
+ case AMDILISD::ATOM_L_XCHG:
+ return "AMDILISD::ATOM_L_XCHG";
+ case AMDILISD::ATOM_L_XOR:
+ return "AMDILISD::ATOM_L_XOR";
+ case AMDILISD::ATOM_L_ADD_NORET:
+ return "AMDILISD::ATOM_L_ADD_NORET";
+ case AMDILISD::ATOM_L_AND_NORET:
+ return "AMDILISD::ATOM_L_AND_NORET";
+ case AMDILISD::ATOM_L_CMPXCHG_NORET:
+ return "AMDILISD::ATOM_L_CMPXCHG_NORET";
+ case AMDILISD::ATOM_L_DEC_NORET:
+ return "AMDILISD::ATOM_L_DEC_NORET";
+ case AMDILISD::ATOM_L_INC_NORET:
+ return "AMDILISD::ATOM_L_INC_NORET";
+ case AMDILISD::ATOM_L_MAX_NORET:
+ return "AMDILISD::ATOM_L_MAX_NORET";
+ case AMDILISD::ATOM_L_UMAX_NORET:
+ return "AMDILISD::ATOM_L_UMAX_NORET";
+ case AMDILISD::ATOM_L_MIN_NORET:
+ return "AMDILISD::ATOM_L_MIN_NORET";
+ case AMDILISD::ATOM_L_UMIN_NORET:
+ return "AMDILISD::ATOM_L_UMIN_NORET";
+ case AMDILISD::ATOM_L_OR_NORET:
+ return "AMDILISD::ATOM_L_OR_NORET";
+ case AMDILISD::ATOM_L_SUB_NORET:
+ return "AMDILISD::ATOM_L_SUB_NORET";
+ case AMDILISD::ATOM_L_RSUB_NORET:
+ return "AMDILISD::ATOM_L_RSUB_NORET";
+ case AMDILISD::ATOM_L_XCHG_NORET:
+ return "AMDILISD::ATOM_L_XCHG_NORET";
+ case AMDILISD::ATOM_R_ADD:
+ return "AMDILISD::ATOM_R_ADD";
+ case AMDILISD::ATOM_R_AND:
+ return "AMDILISD::ATOM_R_AND";
+ case AMDILISD::ATOM_R_CMPXCHG:
+ return "AMDILISD::ATOM_R_CMPXCHG";
+ case AMDILISD::ATOM_R_DEC:
+ return "AMDILISD::ATOM_R_DEC";
+ case AMDILISD::ATOM_R_INC:
+ return "AMDILISD::ATOM_R_INC";
+ case AMDILISD::ATOM_R_MAX:
+ return "AMDILISD::ATOM_R_MAX";
+ case AMDILISD::ATOM_R_UMAX:
+ return "AMDILISD::ATOM_R_UMAX";
+ case AMDILISD::ATOM_R_MIN:
+ return "AMDILISD::ATOM_R_MIN";
+ case AMDILISD::ATOM_R_UMIN:
+ return "AMDILISD::ATOM_R_UMIN";
+ case AMDILISD::ATOM_R_OR:
+ return "AMDILISD::ATOM_R_OR";
+ case AMDILISD::ATOM_R_MSKOR:
+ return "AMDILISD::ATOM_R_MSKOR";
+ case AMDILISD::ATOM_R_SUB:
+ return "AMDILISD::ATOM_R_SUB";
+ case AMDILISD::ATOM_R_RSUB:
+ return "AMDILISD::ATOM_R_RSUB";
+ case AMDILISD::ATOM_R_XCHG:
+ return "AMDILISD::ATOM_R_XCHG";
+ case AMDILISD::ATOM_R_XOR:
+ return "AMDILISD::ATOM_R_XOR";
+ case AMDILISD::ATOM_R_ADD_NORET:
+ return "AMDILISD::ATOM_R_ADD_NORET";
+ case AMDILISD::ATOM_R_AND_NORET:
+ return "AMDILISD::ATOM_R_AND_NORET";
+ case AMDILISD::ATOM_R_CMPXCHG_NORET:
+ return "AMDILISD::ATOM_R_CMPXCHG_NORET";
+ case AMDILISD::ATOM_R_DEC_NORET:
+ return "AMDILISD::ATOM_R_DEC_NORET";
+ case AMDILISD::ATOM_R_INC_NORET:
+ return "AMDILISD::ATOM_R_INC_NORET";
+ case AMDILISD::ATOM_R_MAX_NORET:
+ return "AMDILISD::ATOM_R_MAX_NORET";
+ case AMDILISD::ATOM_R_UMAX_NORET:
+ return "AMDILISD::ATOM_R_UMAX_NORET";
+ case AMDILISD::ATOM_R_MIN_NORET:
+ return "AMDILISD::ATOM_R_MIN_NORET";
+ case AMDILISD::ATOM_R_UMIN_NORET:
+ return "AMDILISD::ATOM_R_UMIN_NORET";
+ case AMDILISD::ATOM_R_OR_NORET:
+ return "AMDILISD::ATOM_R_OR_NORET";
+ case AMDILISD::ATOM_R_MSKOR_NORET:
+ return "AMDILISD::ATOM_R_MSKOR_NORET";
+ case AMDILISD::ATOM_R_SUB_NORET:
+ return "AMDILISD::ATOM_R_SUB_NORET";
+ case AMDILISD::ATOM_R_RSUB_NORET:
+ return "AMDILISD::ATOM_R_RSUB_NORET";
+ case AMDILISD::ATOM_R_XCHG_NORET:
+ return "AMDILISD::ATOM_R_XCHG_NORET";
+ case AMDILISD::ATOM_R_XOR_NORET:
+ return "AMDILISD::ATOM_R_XOR_NORET";
+ case AMDILISD::APPEND_ALLOC:
+ return "AMDILISD::APPEND_ALLOC";
+ case AMDILISD::APPEND_CONSUME:
+ return "AMDILISD::APPEND_CONSUME";
+ };
+}
+
+/// getSetCCResultType - Return the value type to use for ISD::SETCC.
+EVT AMDILTargetLowering::getSetCCResultType(EVT VT) const
+{
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+
+bool
+AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const
+{
+ if (Intrinsic <= AMDILIntrinsic::last_non_AMDIL_intrinsic
+ || Intrinsic > AMDILIntrinsic::num_AMDIL_intrinsics) {
+ return false;
+ }
+ bool bitCastToInt = false;
+ unsigned IntNo;
+ bool isRet = true;
+ const AMDILSubtarget *STM = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ switch (Intrinsic) {
+ default:
+ return false; // Don't custom lower most intrinsics.
+ case AMDILIntrinsic::AMDIL_atomic_add_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_add_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_add_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_add_gu64:
+ IntNo = AMDILISD::ATOM_G_ADD;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_add_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_gi64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_gu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_ADD_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_add_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_add_li32:
+ case AMDILIntrinsic::AMDIL_atomic_add_lu64:
+ case AMDILIntrinsic::AMDIL_atomic_add_li64:
+ IntNo = AMDILISD::ATOM_L_ADD;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_add_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_li64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_lu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_ADD_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_add_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_add_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_add_ru64:
+ case AMDILIntrinsic::AMDIL_atomic_add_ri64:
+ IntNo = AMDILISD::ATOM_R_ADD;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_add_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_ri64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_ru64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_ADD_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_and_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_and_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_and_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_and_gu64:
+ IntNo = AMDILISD::ATOM_G_AND;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_and_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_gi64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_gu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_AND_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_and_li32:
+ case AMDILIntrinsic::AMDIL_atomic_and_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_and_li64:
+ case AMDILIntrinsic::AMDIL_atomic_and_lu64:
+ IntNo = AMDILISD::ATOM_L_AND;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_and_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_li64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_lu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_AND_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_and_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_and_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_and_ri64:
+ case AMDILIntrinsic::AMDIL_atomic_and_ru64:
+ IntNo = AMDILISD::ATOM_R_AND;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_and_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_ri64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_ru64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_AND_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu64:
+ IntNo = AMDILISD::ATOM_G_CMPXCHG;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li32:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li64:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu64:
+ IntNo = AMDILISD::ATOM_L_CMPXCHG;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri64:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru64:
+ IntNo = AMDILISD::ATOM_R_CMPXCHG;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_dec_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_dec_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_dec_gu64:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_G_SUB;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_gi64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_gu64_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_G_SUB_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_li32:
+ case AMDILIntrinsic::AMDIL_atomic_dec_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_dec_li64:
+ case AMDILIntrinsic::AMDIL_atomic_dec_lu64:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_L_SUB;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_li64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_lu64_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_L_SUB_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_dec_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_dec_ri64:
+ case AMDILIntrinsic::AMDIL_atomic_dec_ru64:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_R_SUB;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_ri64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_ru64_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_R_SUB_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_inc_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_inc_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_inc_gu64:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_G_ADD;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_gi64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_gu64_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_G_ADD_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_li32:
+ case AMDILIntrinsic::AMDIL_atomic_inc_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_inc_li64:
+ case AMDILIntrinsic::AMDIL_atomic_inc_lu64:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_L_ADD;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_li64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_lu64_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_L_ADD_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_inc_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_inc_ri64:
+ case AMDILIntrinsic::AMDIL_atomic_inc_ru64:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_R_ADD;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_ri64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_ru64_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_R_ADD_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_max_gi64:
+ IntNo = AMDILISD::ATOM_G_MAX;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_max_gu64:
+ IntNo = AMDILISD::ATOM_G_UMAX;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_max_gi64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_MAX_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_max_gu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_UMAX_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_li32:
+ case AMDILIntrinsic::AMDIL_atomic_max_li64:
+ IntNo = AMDILISD::ATOM_L_MAX;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_max_lu64:
+ IntNo = AMDILISD::ATOM_L_UMAX;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_max_li64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_MAX_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_max_lu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_UMAX_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_max_ri64:
+ IntNo = AMDILISD::ATOM_R_MAX;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_max_ru64:
+ IntNo = AMDILISD::ATOM_R_UMAX;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_max_ri64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_MAX_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_max_ru64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_UMAX_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_min_gi64:
+ IntNo = AMDILISD::ATOM_G_MIN;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_min_gu64:
+ IntNo = AMDILISD::ATOM_G_UMIN;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_min_gi64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_MIN_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_min_gu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_UMIN_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_li32:
+ case AMDILIntrinsic::AMDIL_atomic_min_li64:
+ IntNo = AMDILISD::ATOM_L_MIN;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_min_lu64:
+ IntNo = AMDILISD::ATOM_L_UMIN;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_min_li64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_MIN_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_min_lu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_UMIN_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_min_ri64:
+ IntNo = AMDILISD::ATOM_R_MIN;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_min_ru64:
+ IntNo = AMDILISD::ATOM_R_UMIN;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_min_ri64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_MIN_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_min_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_min_ru64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_UMIN_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_or_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_or_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_or_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_or_gu64:
+ IntNo = AMDILISD::ATOM_G_OR;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_or_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_gi64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_gu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_OR_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_or_li32:
+ case AMDILIntrinsic::AMDIL_atomic_or_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_or_li64:
+ case AMDILIntrinsic::AMDIL_atomic_or_lu64:
+ IntNo = AMDILISD::ATOM_L_OR;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_or_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_li64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_lu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_OR_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_or_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_or_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_or_ri64:
+ case AMDILIntrinsic::AMDIL_atomic_or_ru64:
+ IntNo = AMDILISD::ATOM_R_OR;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_or_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_ri64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_ru64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_OR_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_sub_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_sub_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_sub_gu64:
+ IntNo = AMDILISD::ATOM_G_SUB;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_gi64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_gu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_SUB_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_li32:
+ case AMDILIntrinsic::AMDIL_atomic_sub_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_sub_li64:
+ case AMDILIntrinsic::AMDIL_atomic_sub_lu64:
+ IntNo = AMDILISD::ATOM_L_SUB;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_li64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_lu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_SUB_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_sub_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_sub_ri64:
+ case AMDILIntrinsic::AMDIL_atomic_sub_ru64:
+ IntNo = AMDILISD::ATOM_R_SUB;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_ri64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_ru64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_SUB_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gu64:
+ IntNo = AMDILISD::ATOM_G_RSUB;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gi64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_RSUB_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_li32:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_li64:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_lu64:
+ IntNo = AMDILISD::ATOM_L_RSUB;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_li64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_lu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_RSUB_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ri64:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ru64:
+ IntNo = AMDILISD::ATOM_R_RSUB;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ri64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ru64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_RSUB_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gf32:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gu64:
+ IntNo = AMDILISD::ATOM_G_XCHG;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gf32_noret:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gi64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_XCHG_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_lf32:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_li32:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_li64:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_lu64:
+ IntNo = AMDILISD::ATOM_L_XCHG;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_lf32_noret:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_li64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_lu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_XCHG_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_rf32:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ri64:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ru64:
+ IntNo = AMDILISD::ATOM_R_XCHG;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_rf32_noret:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ri64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ru64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_XCHG_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_xor_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_xor_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_xor_gu64:
+ IntNo = AMDILISD::ATOM_G_XOR;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_gu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_gi64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_gu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_XOR_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_li32:
+ case AMDILIntrinsic::AMDIL_atomic_xor_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_xor_li64:
+ case AMDILIntrinsic::AMDIL_atomic_xor_lu64:
+ IntNo = AMDILISD::ATOM_L_XOR;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_lu32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_li64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_lu64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_XOR_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_xor_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_xor_ri64:
+ case AMDILIntrinsic::AMDIL_atomic_xor_ru64:
+ IntNo = AMDILISD::ATOM_R_XOR;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_ru32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_ri64_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_ru64_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_XOR_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_append_alloc_i32:
+ IntNo = AMDILISD::APPEND_ALLOC;
+ break;
+ case AMDILIntrinsic::AMDIL_append_consume_i32:
+ IntNo = AMDILISD::APPEND_CONSUME;
+ break;
+ };
+ const AMDILSubtarget *stm = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ AMDILKernelManager *KM = const_cast<AMDILKernelManager*>(
+ stm->getKernelManager());
+ KM->setOutputInst();
+
+ Info.opc = IntNo;
+ Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
+ Info.ptrVal = I.getOperand(0);
+ Info.offset = 0;
+ Info.align = 4;
+ Info.vol = true;
+ Info.readMem = isRet;
+ Info.writeMem = true;
+ return true;
+}
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool
+AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDILTargetLowering::computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const
+{
+ APInt KnownZero2;
+ APInt KnownOne2;
+ unsigned BitWidth = KnownZero.getBitWidth();
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything
+ switch (Op.getOpcode()) {
+ default:
+ break;
+ case AMDILISD::SELECT_CC:
+ DAG.ComputeMaskedBits(
+ Op.getOperand(1),
+ KnownZero,
+ KnownOne,
+ Depth + 1
+ );
+ DAG.ComputeMaskedBits(
+ Op.getOperand(0),
+ KnownZero2,
+ KnownOne2
+ );
+ assert((KnownZero & KnownOne) == 0
+ && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0
+ && "Bits known to be one AND zero?");
+ // Only known if known in both the LHS and RHS
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ };
+}
+
+// This is the function that determines which calling convention should
+// be used. Currently there is only one calling convention
+CCAssignFn*
+AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
+{
+ //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ return CC_AMDIL32;
+}
+
+// LowerCallResult - Lower the result values of an ISD::CALL into the
+// appropriate copies out of appropriate physical registers. This assumes that
+// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+// being lowered. The returns a SDNode with the same number of values as the
+// ISD::CALL.
+SDValue
+AMDILTargetLowering::LowerCallResult(
+ SDValue Chain,
+ SDValue InFlag,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const
+{
+ // Assign locations to each value returned by this call
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ if (RVLocs[i].isRegLoc()) {
+ Chain = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ RVLocs[i].getLocReg(),
+ CopyVT,
+ InFlag
+ ).getValue(1);
+ SDValue Val = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Val);
+ }
+ }
+
+ return Chain;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AMDILTargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr *MI, MachineBasicBlock *BB) const
+{
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ switch (MI->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::CMP);
+ generateCMPInstr(MI, BB, TII);
+ MI->eraseFromParent();
+ break;
+ default:
+ break;
+ }
+ return BB;
+}
+
+// Recursively assign SDNodeOrdering to any unordered nodes
+// This is necessary to maintain source ordering of instructions
+// under -O0 to avoid odd-looking "skipping around" issues.
+static const SDValue
+Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
+{
+ if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
+ DAG.AssignOrdering( New.getNode(), order );
+ for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
+ Ordered( DAG, order, New.getOperand(i) );
+ }
+ return New;
+}
+
+#define LOWER(A) \
+ case ISD:: A: \
+return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
+
+SDValue
+AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ assert(0 && "Custom lowering code for this"
+ "instruction is not implemented yet!");
+ break;
+ LOWER(GlobalAddress);
+ LOWER(JumpTable);
+ LOWER(ConstantPool);
+ LOWER(ExternalSymbol);
+ LOWER(FP_TO_SINT);
+ LOWER(FP_TO_UINT);
+ LOWER(SINT_TO_FP);
+ LOWER(UINT_TO_FP);
+ LOWER(ADD);
+ LOWER(MUL);
+ LOWER(SUB);
+ LOWER(FDIV);
+ LOWER(SDIV);
+ LOWER(SREM);
+ LOWER(UDIV);
+ LOWER(UREM);
+ LOWER(BUILD_VECTOR);
+ LOWER(INSERT_VECTOR_ELT);
+ LOWER(EXTRACT_VECTOR_ELT);
+ LOWER(EXTRACT_SUBVECTOR);
+ LOWER(SCALAR_TO_VECTOR);
+ LOWER(CONCAT_VECTORS);
+ LOWER(AND);
+ LOWER(OR);
+ LOWER(SELECT);
+ LOWER(SELECT_CC);
+ LOWER(SETCC);
+ LOWER(SIGN_EXTEND_INREG);
+ LOWER(BITCAST);
+ LOWER(DYNAMIC_STACKALLOC);
+ LOWER(BRCOND);
+ LOWER(BR_CC);
+ LOWER(FP_ROUND);
+ }
+ return Op;
+}
+
+int
+AMDILTargetLowering::getVarArgsFrameOffset() const
+{
+ return VarArgsFrameOffset;
+}
+#undef LOWER
+
+SDValue
+AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = Op;
+ const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *G = GADN->getGlobal();
+ DebugLoc DL = Op.getDebugLoc();
+ MachineFunction &MF = DAG.getMachineFunction();
+ AMDILModuleInfo* AMI = &(MF.getMMI().getObjFileInfo<AMDILModuleInfo>());
+ EVT PtrVT = getPointerTy();
+
+ int64_t base_offset = GADN->getOffset();
+ int32_t arrayoffset = AMI->getArrayOffset(G->getName().str());
+ int32_t constoffset = AMI->getConstOffset(G->getName().str());
+ if (arrayoffset != -1) {
+ DST = DAG.getConstant(arrayoffset, PtrVT);
+ DST = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DST, DAG.getConstant(base_offset, PtrVT));
+ } else if (constoffset != -1) {
+ if (AMI->getConstHWBit(G->getName().str())) {
+ DST = DAG.getConstant(constoffset, PtrVT);
+ DST = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DST, DAG.getConstant(base_offset, PtrVT));
+ } else {
+ SDValue addr = DAG.getTargetGlobalAddress(G, DL, PtrVT);
+ SDValue DPReg = DAG.getRegister(AMDIL::SDP, PtrVT);
+ DPReg = DAG.getNode(ISD::ADD, DL, PtrVT, DPReg,
+ DAG.getConstant(base_offset, PtrVT));
+ DST = DAG.getNode(AMDILISD::ADDADDR, DL, PtrVT, addr, DPReg);
+ }
+ } else {
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV) {
+ DST = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
+ } else {
+ if (GV->hasInitializer()) {
+ const Constant *C = dyn_cast<Constant>(GV->getInitializer());
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ DST = DAG.getConstant(CI->getValue(), Op.getValueType());
+
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
+ DST = DAG.getConstantFP(CF->getValueAPF(),
+ Op.getValueType());
+ } else if (dyn_cast<ConstantAggregateZero>(C)) {
+ EVT VT = Op.getValueType();
+ if (VT.isInteger()) {
+ DST = DAG.getConstant(0, VT);
+ } else {
+ DST = DAG.getConstantFP(0, VT);
+ }
+ } else {
+ assert(!"lowering this type of Global Address "
+ "not implemented yet!");
+ C->dump();
+ DST = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
+ }
+ } else {
+ DST = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
+ }
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
+ return Result;
+}
+SDValue
+AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ EVT PtrVT = Op.getValueType();
+ SDValue Result;
+ if (CP->isMachineConstantPoolEntry()) {
+ Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ } else {
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ }
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
+{
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ return Result;
+}
+/// LowerFORMAL_ARGUMENTS - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg, hasStructRet, isMemReg
+SDValue
+AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+ AMDILMachineFunctionInfo *FuncInfo
+ = MF.getInfo<AMDILMachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ //const Function *Fn = MF.getFunction();
+ //MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CallingConv::ID CC = MF.getFunction()->getCallingConv();
+ //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
+
+ CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // When more calling conventions are added, they need to be chosen here
+ CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
+ SDValue StackPtr;
+
+ //unsigned int FirstStackArgLoc = 0;
+
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ EVT ValVT = VA.getValVT();
+ const TargetRegisterClass *RC = getRegClassFromType(
+ RegVT.getSimpleVT().SimpleTy);
+
+ unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ FuncInfo->addArgReg(VA.getLocReg());
+ SDValue ArgValue = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ Reg,
+ RegVT);
+ // If this is an 8 or 16-bit value, it is really passed
+ // promoted to 32 bits. Insert an assert[sz]ext to capture
+ // this, then truncate to the right size.
+
+ if (VA.getLocInfo() == CCValAssign::SExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertSext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(ValVT));
+ } else if (VA.getLocInfo() == CCValAssign::ZExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertZext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(ValVT));
+ }
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ ArgValue = DAG.getNode(
+ ISD::TRUNCATE,
+ dl,
+ ValVT,
+ ArgValue);
+ }
+ // Add the value to the list of arguments
+ // to be passed in registers
+ InVals.push_back(ArgValue);
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }
+ } else if(VA.isMemLoc()) {
+ InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
+ dl, DAG, VA, MFI, i));
+ } else {
+ assert(0 && "found a Value Assign that is "
+ "neither a register or a memory location");
+ }
+ }
+ /*if (hasStructRet) {
+ assert(0 && "Has struct return is not yet implemented");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }*/
+
+ unsigned int StackSize = CCInfo.getNextStackOffset();
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
+ }
+ // This needs to be changed to non-zero if the return function needs
+ // to pop bytes
+ FuncInfo->setBytesToPopOnReturn(StackSize);
+ return Chain;
+}
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG)
+{
+ assert(0 && "MemCopy does not exist yet");
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+ return DAG.getMemcpy(Chain,
+ Src.getDebugLoc(),
+ Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*IsVol=*/false, /*AlwaysInline=*/true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+SDValue
+AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const
+{
+ unsigned int LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD,
+ dl,
+ getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal()) {
+ PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
+ } else {
+ PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+ }
+ return PtrOff;
+}
+/// LowerCAL - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall, hasStructRet
+SDValue
+AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool doesNotReturn, bool isVarArg, bool& isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const
+{
+ isTailCall = false;
+ MachineFunction& MF = DAG.getMachineFunction();
+ // FIXME: DO we need to handle fast calling conventions and tail call
+ // optimizations?? X86/PPC ISelLowering
+ /*bool hasStructRet = (TheCall->getNumArgs())
+ ? TheCall->getArgFlags(0).device()->isSRet()
+ : false;*/
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // Analyize the calling operands, but need to change
+ // if we have more than one calling convetion
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+ unsigned int NumBytes = CCInfo.getNextStackOffset();
+ if (isTailCall) {
+ assert(isTailCall && "Tail Call not handled yet!");
+ // See X86/PPC ISelLowering
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+ //unsigned int FirstStacArgLoc = 0;
+ //int LastArgStackLoc = 0;
+
+ // Walk the register/memloc assignments, insert copies/loads
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = OutVals[i];
+ //Promote the value if needed
+ switch(VA.getLocInfo()) {
+ default:
+ assert(0 && "Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (VA.isMemLoc()) {
+ // Create the frame index object for this incoming parameter
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true
+ );
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ } else {
+ assert(0 && "Not a Reg/Mem Loc, major error!");
+ }
+ }
+ if (!MemOpChains.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor,
+ dl,
+ MVT::Other,
+ &MemOpChains[0],
+ MemOpChains.size());
+ }
+ SDValue InFlag;
+ if (!isTailCall) {
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain,
+ dl,
+ RegsToPass[i].first,
+ RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress/
+ // TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ } else if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1708
+ }
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
+ SmallVector<SDValue, 8> Ops;
+
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1721
+ }
+ // If this is a direct call, pass the chain and the callee
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1739
+ }
+
+ // Add argument registers to the end of the list so that they are known
+ // live into the call
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(
+ RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+ if (InFlag.getNode()) {
+ Ops.push_back(InFlag);
+ }
+
+ // Emit Tail Call
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1762
+ }
+
+ Chain = DAG.getNode(AMDILISD::CALL,
+ dl,
+ NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node
+ Chain = DAG.getCALLSEQ_END(
+ Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ // Handle result values, copying them out of physregs into vregs that
+ // we return
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
+}
+static void checkMADType(
+ SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
+{
+ bool globalLoadStore = false;
+ is24bitMAD = false;
+ is32bitMAD = false;
+ return;
+ assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
+ "this to work correctly!");
+ if (Op.getNode()->use_empty()) {
+ return;
+ }
+ for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
+ nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
+ SDNode *ptr = *nBegin;
+ const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
+ // If we are not a LSBaseSDNode then we don't do this
+ // optimization.
+ // If we are a LSBaseSDNode, but the op is not the offset
+ // or base pointer, then we don't do this optimization
+ // (i.e. we are the value being stored)
+ if (!lsNode ||
+ (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
+ return;
+ }
+ const PointerType *PT =
+ dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
+ unsigned as = PT->getAddressSpace();
+ switch(as) {
+ default:
+ globalLoadStore = true;
+ case AMDILAS::PRIVATE_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::CONSTANT_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::LOCAL_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::REGION_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ }
+ }
+ if (globalLoadStore) {
+ is32bitMAD = true;
+ } else {
+ is24bitMAD = true;
+ }
+}
+
+SDValue
+AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ const AMDILSubtarget *stm = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ bool isVec = OVT.isVector();
+ if (OVT.getScalarType() == MVT::i64) {
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
+ && INTTY == MVT::i32) {
+ DST = DAG.getNode(AMDILISD::ADD,
+ DL,
+ OVT,
+ LHS, RHS);
+ } else {
+ SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
+ // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
+ LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
+ RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
+ LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
+ RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+ INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
+ SDValue cmp;
+ cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ INTLO, RHSLO);
+ cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
+ INTLO, INTHI);
+ }
+ } else {
+ if (LHS.getOpcode() == ISD::FrameIndex ||
+ RHS.getOpcode() == ISD::FrameIndex) {
+ DST = DAG.getNode(AMDILISD::ADDADDR,
+ DL,
+ OVT,
+ LHS, RHS);
+ } else {
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+ && LHS.getNumOperands()
+ && RHS.getNumOperands()) {
+ bool is24bitMAD = false;
+ bool is32bitMAD = false;
+ const ConstantSDNode *LHSConstOpCode =
+ dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
+ const ConstantSDNode *RHSConstOpCode =
+ dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
+ if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
+ || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
+ || LHS.getOpcode() == ISD::MUL
+ || RHS.getOpcode() == ISD::MUL) {
+ SDValue Op1, Op2, Op3;
+ // FIXME: Fix this so that it works for unsigned 24bit ops.
+ if (LHS.getOpcode() == ISD::MUL) {
+ Op1 = LHS.getOperand(0);
+ Op2 = LHS.getOperand(1);
+ Op3 = RHS;
+ } else if (RHS.getOpcode() == ISD::MUL) {
+ Op1 = RHS.getOperand(0);
+ Op2 = RHS.getOperand(1);
+ Op3 = LHS;
+ } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
+ Op1 = LHS.getOperand(0);
+ Op2 = DAG.getConstant(
+ 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
+ Op3 = RHS;
+ } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
+ Op1 = RHS.getOperand(0);
+ Op2 = DAG.getConstant(
+ 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
+ Op3 = LHS;
+ }
+ checkMADType(Op, stm, is24bitMAD, is32bitMAD);
+ // We can possibly do a MAD transform!
+ if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+ uint32_t opcode = AMDILIntrinsic::AMDIL_mad24_i32;
+ SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+ DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+ DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
+ Op1, Op2, Op3);
+ } else if(is32bitMAD) {
+ SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+ DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+ DL, Tys, DAG.getEntryNode(),
+ DAG.getConstant(
+ AMDILIntrinsic::AMDIL_mad_i32, MVT::i32),
+ Op1, Op2, Op3);
+ }
+ }
+ }
+ DST = DAG.getNode(AMDILISD::ADD,
+ DL,
+ OVT,
+ LHS, RHS);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
+ uint32_t bits) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY = Op.getValueType();
+ EVT FPTY;
+ if (INTTY.isVector()) {
+ FPTY = EVT(MVT::getVectorVT(MVT::f32,
+ INTTY.getVectorNumElements()));
+ } else {
+ FPTY = EVT(MVT::f32);
+ }
+ /* static inline uint
+ __clz_Nbit(uint x)
+ {
+ int xor = 0x3f800000U | x;
+ float tp = as_float(xor);
+ float t = tp + -1.0f;
+ uint tint = as_uint(t);
+ int cmp = (x != 0);
+ uint tsrc = tint >> 23;
+ uint tmask = tsrc & 0xffU;
+ uint cst = (103 + N)U - tmask;
+ return cmp ? cst : N;
+ }
+ */
+ assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
+ && "genCLZu16 only works on 32bit types");
+ // uint x = Op
+ SDValue x = Op;
+ // xornode = 0x3f800000 | x
+ SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
+ DAG.getConstant(0x3f800000, INTTY), x);
+ // float tp = as_float(xornode)
+ SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
+ // float t = tp + -1.0f
+ SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
+ DAG.getConstantFP(-1.0f, FPTY));
+ // uint tint = as_uint(t)
+ SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
+ // int cmp = (x != 0)
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
+ DAG.getConstant(0, INTTY));
+ // uint tsrc = tint >> 23
+ SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
+ DAG.getConstant(23, INTTY));
+ // uint tmask = tsrc & 0xFF
+ SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
+ DAG.getConstant(0xFFU, INTTY));
+ // uint cst = (103 + bits) - tmask
+ SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
+ DAG.getConstant((103U + bits), INTTY), tmask);
+ // return cmp ? cst : N
+ cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
+ DAG.getConstant(bits, INTTY));
+ return cst;
+}
+
+SDValue
+AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = SDValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY = Op.getValueType();
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ //__clz_32bit(uint u)
+ //{
+ // int z = __amdil_ffb_hi(u) ;
+ // return z < 0 ? 32 : z;
+ // }
+ // uint u = op
+ SDValue u = Op;
+ // int z = __amdil_ffb_hi(u)
+ SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
+ // int cmp = z < 0
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ z, DAG.getConstant(0, INTTY));
+ // return cmp ? 32 : z
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
+ DAG.getConstant(32, INTTY), z);
+ } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // static inline uint
+ //__clz_32bit(uint x)
+ //{
+ // uint zh = __clz_16bit(x >> 16);
+ // uint zl = __clz_16bit(x & 0xffffU);
+ // return zh == 16U ? 16U + zl : zh;
+ //}
+ // uint x = Op
+ SDValue x = Op;
+ // uint xs16 = x >> 16
+ SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
+ DAG.getConstant(16, INTTY));
+ // uint zh = __clz_16bit(xs16)
+ SDValue zh = genCLZuN(xs16, DAG, 16);
+ // uint xa16 = x & 0xFFFF
+ SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
+ DAG.getConstant(0xFFFFU, INTTY));
+ // uint zl = __clz_16bit(xa16)
+ SDValue zl = genCLZuN(xa16, DAG, 16);
+ // uint cmp = zh == 16U
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zh, DAG.getConstant(16U, INTTY));
+ // uint zl16 = zl + 16
+ SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
+ DAG.getConstant(16, INTTY), zl);
+ // return cmp ? zl16 : zh
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
+ cmp, zl16, zh);
+ } else {
+ assert(0 && "Attempting to generate a CLZ function with an"
+ " unknown graphics card");
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = SDValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY;
+ EVT LONGTY = Op.getValueType();
+ bool isVec = LONGTY.isVector();
+ if (isVec) {
+ INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
+ .getVectorNumElements()));
+ } else {
+ INTTY = EVT(MVT::i32);
+ }
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ // Evergreen:
+ // static inline uint
+ // __clz_u64(ulong x)
+ // {
+ //uint zhi = __clz_32bit((uint)(x >> 32));
+ //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
+ //return zhi == 32U ? 32U + zlo : zhi;
+ //}
+ //ulong x = op
+ SDValue x = Op;
+ // uint xhi = x >> 32
+ SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+ // uint xlo = x & 0xFFFFFFFF
+ SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
+ // uint zhi = __clz_32bit(xhi)
+ SDValue zhi = genCLZu32(xhi, DAG);
+ // uint zlo = __clz_32bit(xlo)
+ SDValue zlo = genCLZu32(xlo, DAG);
+ // uint cmp = zhi == 32
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhi, DAG.getConstant(32U, INTTY));
+ // uint zlop32 = 32 + zlo
+ SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
+ DAG.getConstant(32U, INTTY), zlo);
+ // return cmp ? zlop32: zhi
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
+ } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // HD4XXX:
+ // static inline uint
+ //__clz_64bit(ulong x)
+ //{
+ //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
+ //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
+ //uint zl = __clz_23bit((uint)x & 0x7fffffU);
+ //uint r = zh == 18U ? 18U + zm : zh;
+ //return zh + zm == 41U ? 41U + zl : r;
+ //}
+ //ulong x = Op
+ SDValue x = Op;
+ // ulong xs46 = x >> 46
+ SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
+ DAG.getConstant(46, LONGTY));
+ // uint ixs46 = (uint)xs46
+ SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
+ // ulong xs23 = x >> 23
+ SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
+ DAG.getConstant(23, LONGTY));
+ // uint ixs23 = (uint)xs23
+ SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
+ // uint xs23m23 = ixs23 & 0x7FFFFF
+ SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
+ DAG.getConstant(0x7fffffU, INTTY));
+ // uint ix = (uint)x
+ SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+ // uint xm23 = ix & 0x7FFFFF
+ SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
+ DAG.getConstant(0x7fffffU, INTTY));
+ // uint zh = __clz_23bit(ixs46)
+ SDValue zh = genCLZuN(ixs46, DAG, 23);
+ // uint zm = __clz_23bit(xs23m23)
+ SDValue zm = genCLZuN(xs23m23, DAG, 23);
+ // uint zl = __clz_23bit(xm23)
+ SDValue zl = genCLZuN(xm23, DAG, 23);
+ // uint zhm5 = zh - 5
+ SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
+ DAG.getConstant(-5U, INTTY));
+ SDValue const18 = DAG.getConstant(18, INTTY);
+ SDValue const41 = DAG.getConstant(41, INTTY);
+ // uint cmp1 = zh = 18
+ SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhm5, const18);
+ // uint zhm5zm = zhm5 + zh
+ SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
+ // uint cmp2 = zhm5zm == 41
+ SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhm5zm, const41);
+ // uint zmp18 = zhm5 + 18
+ SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
+ // uint zlp41 = zl + 41
+ SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
+ // uint r = cmp1 ? zmp18 : zh
+ SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
+ cmp1, zmp18, zhm5);
+ // return cmp2 ? zlp41 : r
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
+ } else {
+ assert(0 && "Attempting to generate a CLZ function with an"
+ " unknown graphics card");
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
+ bool includeSign) const
+{
+ EVT INTVT;
+ EVT LONGVT;
+ SDValue DST;
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT RHSVT = RHS.getValueType();
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
+ .getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
+ .getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (0 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // unsigned version:
+ // uint uhi = (uint)(d * 0x1.0p-32);
+ // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
+ // return as_ulong2((uint2)(ulo, uhi));
+ //
+ // signed version:
+ // double ad = fabs(d);
+ // long l = unsigned_version(ad);
+ // long nl = -l;
+ // return d == ad ? l : nl;
+ SDValue d = RHS;
+ if (includeSign) {
+ d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
+ }
+ uint64_t val = 0x3DF0000000000000ULL;
+ double dval = *(double*)&val;
+ SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
+ DAG.getConstantFP(dval, RHSVT));
+ SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
+ SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
+ val = 0xC1F0000000000000ULL;
+ dval = *(double*)&val;
+ ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
+ DAG.getConstantFP(dval, RHSVT), d);
+ SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
+ SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
+ if (includeSign) {
+ SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
+ SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
+ RHS, d);
+ l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
+ }
+ DST = l;
+ } else {
+ /*
+ __attribute__((always_inline)) long
+ cast_f64_to_i64(double d)
+ {
+ // Convert d in to 32-bit components
+ long x = as_long(d);
+ xhi = LCOMPHI(x);
+ xlo = LCOMPLO(x);
+
+ // Generate 'normalized' mantissa
+ mhi = xhi | 0x00100000; // hidden bit
+ mhi <<= 11;
+ temp = xlo >> (32 - 11);
+ mhi |= temp
+ mlo = xlo << 11;
+
+ // Compute shift right count from exponent
+ e = (xhi >> (52-32)) & 0x7ff;
+ sr = 1023 + 63 - e;
+ srge64 = sr >= 64;
+ srge32 = sr >= 32;
+
+ // Compute result for 0 <= sr < 32
+ rhi0 = mhi >> (sr &31);
+ rlo0 = mlo >> (sr &31);
+ temp = mhi << (32 - sr);
+ temp |= rlo0;
+ rlo0 = sr ? temp : rlo0;
+
+ // Compute result for 32 <= sr
+ rhi1 = 0;
+ rlo1 = srge64 ? 0 : rhi0;
+
+ // Pick between the 2 results
+ rhi = srge32 ? rhi1 : rhi0;
+ rlo = srge32 ? rlo1 : rlo0;
+
+ // Optional saturate on overflow
+ srlt0 = sr < 0;
+ rhi = srlt0 ? MAXVALUE : rhi;
+ rlo = srlt0 ? MAXVALUE : rlo;
+
+ // Create long
+ res = LCREATE( rlo, rhi );
+
+ // Deal with sign bit (ignoring whether result is signed or unsigned value)
+ if (includeSign) {
+ sign = ((signed int) xhi) >> 31; fill with sign bit
+ sign = LCREATE( sign, sign );
+ res += sign;
+ res ^= sign;
+ }
+
+ return res;
+ }
+ */
+ SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
+ SDValue c32 = DAG.getConstant( 32, INTVT );
+
+ // Convert d in to 32-bit components
+ SDValue d = RHS;
+ SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Generate 'normalized' mantissa
+ SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
+ xhi, DAG.getConstant( 0x00100000, INTVT ) );
+ mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
+ SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
+ xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
+ mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
+ SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
+
+ // Compute shift right count from exponent
+ SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
+ xhi, DAG.getConstant( 52-32, INTVT ) );
+ e = DAG.getNode( ISD::AND, DL, INTVT,
+ e, DAG.getConstant( 0x7ff, INTVT ) );
+ SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 1023 + 63, INTVT ), e );
+ SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(64, INTVT));
+ SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(32, INTVT));
+
+ // Compute result for 0 <= sr < 32
+ SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
+ SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
+ temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
+ temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
+ temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp );
+ rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
+
+ // Compute result for 32 <= sr
+ SDValue rhi1 = DAG.getConstant( 0, INTVT );
+ SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge64, rhi1, rhi0 );
+
+ // Pick between the 2 results
+ SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, rhi1, rhi0 );
+ SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, rlo1, rlo0 );
+
+ // Create long
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+
+ // Deal with sign bit
+ if (includeSign) {
+ SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
+ xhi, DAG.getConstant( 31, INTVT ) );
+ sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
+ res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
+ res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
+ }
+ DST = res;
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
+ bool includeSign) const
+{
+ EVT INTVT;
+ EVT LONGVT;
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT RHSVT = RHS.getValueType();
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ /*
+ __attribute__((always_inline)) int
+ cast_f64_to_[u|i]32(double d)
+ {
+ // Convert d in to 32-bit components
+ long x = as_long(d);
+ xhi = LCOMPHI(x);
+ xlo = LCOMPLO(x);
+
+ // Generate 'normalized' mantissa
+ mhi = xhi | 0x00100000; // hidden bit
+ mhi <<= 11;
+ temp = xlo >> (32 - 11);
+ mhi |= temp
+
+ // Compute shift right count from exponent
+ e = (xhi >> (52-32)) & 0x7ff;
+ sr = 1023 + 31 - e;
+ srge32 = sr >= 32;
+
+ // Compute result for 0 <= sr < 32
+ res = mhi >> (sr &31);
+ res = srge32 ? 0 : res;
+
+ // Optional saturate on overflow
+ srlt0 = sr < 0;
+ res = srlt0 ? MAXVALUE : res;
+
+ // Deal with sign bit (ignoring whether result is signed or unsigned value)
+ if (includeSign) {
+ sign = ((signed int) xhi) >> 31; fill with sign bit
+ res += sign;
+ res ^= sign;
+ }
+
+ return res;
+ }
+ */
+ SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
+
+ // Convert d in to 32-bit components
+ SDValue d = RHS;
+ SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Generate 'normalized' mantissa
+ SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
+ xhi, DAG.getConstant( 0x00100000, INTVT ) );
+ mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
+ SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
+ xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
+ mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
+
+ // Compute shift right count from exponent
+ SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
+ xhi, DAG.getConstant( 52-32, INTVT ) );
+ e = DAG.getNode( ISD::AND, DL, INTVT,
+ e, DAG.getConstant( 0x7ff, INTVT ) );
+ SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 1023 + 31, INTVT ), e );
+ SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(32, INTVT));
+
+ // Compute result for 0 <= sr < 32
+ SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
+ res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, DAG.getConstant(0,INTVT), res );
+
+ // Deal with sign bit
+ if (includeSign) {
+ SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
+ xhi, DAG.getConstant( 31, INTVT ) );
+ res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
+ res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
+ }
+ return res;
+}
+SDValue
+AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue DST;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (RST == MVT::f64 && RHSVT.isVector()) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
+ DST, op, DAG.getTargetConstant(x, MVT::i32));
+ }
+ }
+ } else if (RST == MVT::f64
+ && LST == MVT::i32) {
+ if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()
+ && stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genf64toi32(RHS, DAG, true);
+ }
+ } else if (RST == MVT::f64
+ && LST == MVT::i64) {
+ DST = genf64toi64(RHS, DAG, true);
+ } else if (RST == MVT::f64
+ && (LST == MVT::i8 || LST == MVT::i16)) {
+ if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()) {
+ DST = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, RHS);
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, DST);
+ } else {
+ SDValue ToInt = genf64toi32(RHS, DAG, true);
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
+ }
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST;
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (RST == MVT::f64 && RHSVT.isVector()) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::FP_TO_UINT, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
+ DST, op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else if (RST == MVT::f64
+ && LST == MVT::i32) {
+ if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()
+ && stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genf64toi32(RHS, DAG, false);
+ }
+ } else if (RST == MVT::f64
+ && LST == MVT::i64) {
+ DST = genf64toi64(RHS, DAG, false);
+ } else if (RST == MVT::f64
+ && (LST == MVT::i8 || LST == MVT::i16)) {
+ if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()) {
+ DST = DAG.getNode(ISD::FP_TO_UINT, DL, MVT::i32, RHS);
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, DST);
+ } else {
+ SDValue ToInt = genf64toi32(RHS, DAG, false);
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
+ }
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
+ SelectionDAG &DAG) const
+{
+ EVT RHSVT = RHS.getValueType();
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT INTVT;
+ EVT LONGVT;
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ SDValue x = RHS;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (stm->calVersion() >= CAL_VERSION_SC_135) {
+ // unsigned x = RHS;
+ // ulong xd = (ulong)(0x4330_0000 << 32) | x;
+ // double d = as_double( xd );
+ // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
+ SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
+ DAG.getConstant( 0x43300000, INTVT ) );
+ SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+ SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
+ DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
+ return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
+ } else {
+ SDValue clz = genCLZu32(x, DAG);
+
+ // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
+ // Except for an input 0... which requires a 0 exponent
+ SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( (1023+31), INTVT), clz );
+ exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
+
+ // Normalize frac
+ SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
+
+ // Eliminate hidden bit
+ rhi = DAG.getNode( ISD::AND, DL, INTVT,
+ rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
+
+ // Pack exponent and frac
+ SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
+ rhi, DAG.getConstant( (32 - 11), INTVT ) );
+ rhi = DAG.getNode( ISD::SRL, DL, INTVT,
+ rhi, DAG.getConstant( 11, INTVT ) );
+ exp = DAG.getNode( ISD::SHL, DL, INTVT,
+ exp, DAG.getConstant( 20, INTVT ) );
+ rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
+
+ // Convert 2 x 32 in to 1 x 64, then to double precision float type
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+ return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
+ }
+}
+SDValue
+AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
+ SelectionDAG &DAG) const
+{
+ EVT RHSVT = RHS.getValueType();
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT INTVT;
+ EVT LONGVT;
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ INTVT = EVT(MVT::i32);
+ }
+ LONGVT = RHSVT;
+ SDValue x = RHS;
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (0 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // double dhi = (double)(as_uint2(x).y);
+ // double dlo = (double)(as_uint2(x).x);
+ // return mad(dhi, 0x1.0p+32, dlo)
+ SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
+ dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
+ SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
+ dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
+ uint64_t val = 0x41f0000000000000ULL;
+ double dval = *(double*)&val;
+ return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
+ DAG.getConstantFP(dval, LHSVT), dlo);
+ } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
+ // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
+ // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
+ // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL
+ SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
+ SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+ SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
+ SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
+ SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
+ SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
+ DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
+ hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
+ return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
+
+ } else {
+ SDValue clz = genCLZu64(x, DAG);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
+ SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( (1023+63), INTVT), clz );
+ SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
+ exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ mash, exp, mash ); // exp = exp, or 0 if input was 0
+
+ // Normalize frac
+ SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
+ clz, DAG.getConstant( 31, INTVT ) );
+ SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 32, INTVT ), clz31 );
+ SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
+ SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
+ t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
+ SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
+ SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
+ SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
+ SDValue rlo2 = DAG.getConstant( 0, INTVT );
+ SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
+ clz, DAG.getConstant( 32, INTVT ) );
+ SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ clz32, rhi2, rhi1 );
+ SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ clz32, rlo2, rlo1 );
+
+ // Eliminate hidden bit
+ rhi = DAG.getNode( ISD::AND, DL, INTVT,
+ rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
+
+ // Save bits needed to round properly
+ SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
+ rlo, DAG.getConstant( 0x7ff, INTVT ) );
+
+ // Pack exponent and frac
+ rlo = DAG.getNode( ISD::SRL, DL, INTVT,
+ rlo, DAG.getConstant( 11, INTVT ) );
+ SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
+ rhi, DAG.getConstant( (32 - 11), INTVT ) );
+ rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
+ rhi = DAG.getNode( ISD::SRL, DL, INTVT,
+ rhi, DAG.getConstant( 11, INTVT ) );
+ exp = DAG.getNode( ISD::SHL, DL, INTVT,
+ exp, DAG.getConstant( 20, INTVT ) );
+ rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
+
+ // Compute rounding bit
+ SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
+ rlo, DAG.getConstant( 1, INTVT ) );
+ SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
+ round, DAG.getConstant( 0x3ff, INTVT ) );
+ grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
+ grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
+ grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
+ round = DAG.getNode( ISD::SRL, DL, INTVT,
+ round, DAG.getConstant( 10, INTVT ) );
+ round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
+
+ // Add rounding bit
+ SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
+ round, DAG.getConstant( 0, INTVT ) );
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+ res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
+ return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
+ }
+}
+SDValue
+AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue DST;
+ EVT INTVT;
+ EVT LONGVT;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (LST == MVT::f64 && LHSVT.isVector()) {
+ // We dont support vector 64bit floating point convertions.
+ DST = Op;
+ for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
+ op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else if (RST == MVT::i32
+ && LST == MVT::f64) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD4XXX
+ && stm->calVersion() >= CAL_VERSION_SC_155) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genu32tof64(RHS, LHSVT, DAG);
+ }
+ } else if (RST == MVT::i64
+ && LST == MVT::f64) {
+ DST = genu64tof64(RHS, LHSVT, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT INTVT;
+ EVT LONGVT;
+ SDValue DST;
+ bool isVec = RHSVT.isVector();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (LST == MVT::f64 && LHSVT.isVector()) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::SINT_TO_FP, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
+ op, DAG.getTargetConstant(x, MVT::i32));
+ }
+ }
+
+ } else {
+
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ if ((RST == MVT::i32 || RST == MVT::i64)
+ && LST == MVT::f64) {
+ if (RST == MVT::i32) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD4XXX
+ && stm->calVersion() >= CAL_VERSION_SC_155) {
+ DST = SDValue(Op.getNode(), 0);
+ return DST;
+ }
+ }
+ SDValue c31 = DAG.getConstant( 31, INTVT );
+ SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
+
+ SDValue S; // Sign, as 0 or -1
+ SDValue Sbit; // Sign bit, as one bit, MSB only.
+ if (RST == MVT::i32) {
+ Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
+ S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
+ } else { // 64-bit case... SRA of 64-bit values is slow
+ SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
+ Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
+ SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
+ S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
+ }
+
+ // get abs() of input value, given sign as S (0 or -1)
+ // SpI = RHS + S
+ SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
+ // SpIxS = SpI ^ S
+ SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
+
+ // Convert unsigned value to double precision
+ SDValue R;
+ if (RST == MVT::i32) {
+ // r = cast_u32_to_f64(SpIxS)
+ R = genu32tof64(SpIxS, LHSVT, DAG);
+ } else {
+ // r = cast_u64_to_f64(SpIxS)
+ R = genu64tof64(SpIxS, LHSVT, DAG);
+ }
+
+ // drop in the sign bit
+ SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
+ SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
+ SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
+ thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
+ t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
+ DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ bool isVec = RHS.getValueType().isVector();
+ if (OVT.getScalarType() == MVT::i64) {
+ /*const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());*/
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
+ // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
+ LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
+ RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
+ LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
+ RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+ INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
+ INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
+ //TODO: need to use IBORROW on HD5XXX and later hardware
+ SDValue cmp;
+ if (OVT == MVT::i64) {
+ cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSLO, RHSLO);
+ } else {
+ SDValue cmplo;
+ SDValue cmphi;
+ SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
+ SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
+ SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
+ SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
+ cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSRLO, RHSRLO);
+ cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSRHI, RHSRHI);
+ cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
+ cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
+ cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
+ }
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
+ INTLO, INTHI);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::f64) {
+ DST = LowerFDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::f32) {
+ DST = LowerFDIV32(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerUDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerUDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerUDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerUREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerUREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerUREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerUREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ bool isVec = OVT.isVector();
+ if (OVT.getScalarType() != MVT::i64) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
+ // TODO: This needs to be turned into a tablegen pattern
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ // mul64(h1, l1, h0, l0)
+ SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY, LHS);
+ SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY, LHS);
+ SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY, RHS);
+ SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY, RHS);
+ // MULLO_UINT_1 r1, h0, l1
+ SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, RHSHI, LHSLO);
+ // MULLO_UINT_1 r2, h1, l0
+ SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, RHSLO, LHSHI);
+ // ADD_INT hr, r1, r2
+ SDValue ADDHI = DAG.getNode(ISD::ADD,
+ DL,
+ INTTY, RHILLO, RLOHHI);
+ // MULHI_UINT_1 r3, l1, l0
+ SDValue RLOLLO = DAG.getNode(ISD::MULHU,
+ DL,
+ INTTY, RHSLO, LHSLO);
+ // ADD_INT hr, hr, r3
+ SDValue HIGH = DAG.getNode(ISD::ADD,
+ DL,
+ INTTY, ADDHI, RLOLLO);
+ // MULLO_UINT_1 l3, l1, l0
+ SDValue LOW = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, LHSLO, RHSLO);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ OVT, LOW, HIGH);
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
+{
+ EVT VT = Op.getValueType();
+ //printSDValue(Op, 1);
+ SDValue Nodes1;
+ SDValue second;
+ SDValue third;
+ SDValue fourth;
+ DebugLoc DL = Op.getDebugLoc();
+ Nodes1 = DAG.getNode(AMDILISD::VBUILD,
+ DL,
+ VT, Op.getOperand(0));
+ bool allEqual = true;
+ for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
+ if (Op.getOperand(0) != Op.getOperand(x)) {
+ allEqual = false;
+ break;
+ }
+ }
+ if (allEqual) {
+ return Nodes1;
+ }
+ switch(Op.getNumOperands()) {
+ default:
+ case 1:
+ break;
+ case 4:
+ fourth = Op.getOperand(3);
+ if (fourth.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ fourth,
+ DAG.getConstant(7, MVT::i32));
+ }
+ case 3:
+ third = Op.getOperand(2);
+ if (third.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ third,
+ DAG.getConstant(6, MVT::i32));
+ }
+ case 2:
+ second = Op.getOperand(1);
+ if (second.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ second,
+ DAG.getConstant(5, MVT::i32));
+ }
+ break;
+ };
+ return Nodes1;
+}
+
+SDValue
+AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ const SDValue *ptr = NULL;
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ uint32_t swizzleNum = 0;
+ SDValue DST;
+ if (!VT.isVector()) {
+ SDValue Res = Op.getOperand(0);
+ return Res;
+ }
+
+ if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
+ ptr = &Op.getOperand(1);
+ } else {
+ ptr = &Op.getOperand(0);
+ }
+ if (CSDN) {
+ swizzleNum = (uint32_t)CSDN->getZExtValue();
+ uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
+ uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
+ DST = DAG.getNode(AMDILISD::VINSERT,
+ DL,
+ VT,
+ Op.getOperand(0),
+ *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ } else {
+ uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
+ uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
+ SDValue res = DAG.getNode(AMDILISD::VINSERT,
+ DL, VT, Op.getOperand(0), *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
+ mask2 = 0x04030201 & ~(0xFF << (x * 8));
+ mask3 = 0x01010101 & (0xFF << (x * 8));
+ SDValue t = DAG.getNode(AMDILISD::VINSERT,
+ DL, VT, Op.getOperand(0), *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
+ DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
+ Op.getOperand(2), DAG.getConstant(x, MVT::i32));
+ c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
+ res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
+ }
+ DST = res;
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ //printSDValue(Op, 1);
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ uint64_t swizzleNum = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Res;
+ if (!Op.getOperand(0).getValueType().isVector()) {
+ Res = Op.getOperand(0);
+ return Res;
+ }
+ if (CSDN) {
+ // Static vector extraction
+ swizzleNum = CSDN->getZExtValue() + 1;
+ Res = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT,
+ Op.getOperand(0),
+ DAG.getTargetConstant(swizzleNum, MVT::i32));
+ } else {
+ SDValue Op1 = Op.getOperand(1);
+ uint32_t vecSize = 4;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT, Op0,
+ DAG.getTargetConstant(1, MVT::i32));
+ if (Op0.getValueType().isVector()) {
+ vecSize = Op0.getValueType().getVectorNumElements();
+ }
+ for (uint32_t x = 2; x <= vecSize; ++x) {
+ SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT, Op0,
+ DAG.getTargetConstant(x, MVT::i32));
+ SDValue c = DAG.getNode(AMDILISD::CMP,
+ DL, Op1.getValueType(),
+ DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
+ Op1, DAG.getConstant(x, MVT::i32));
+ res = DAG.getNode(AMDILISD::CMOVLOG, DL,
+ VT, c, t, res);
+
+ }
+ Res = res;
+ }
+ return Res;
+}
+
+SDValue
+AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ uint32_t vecSize = Op.getValueType().getVectorNumElements();
+ SDValue src = Op.getOperand(0);
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ uint64_t offset = 0;
+ EVT vecType = Op.getValueType().getVectorElementType();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Result;
+ if (CSDN) {
+ offset = CSDN->getZExtValue();
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL,vecType, src, DAG.getConstant(offset, MVT::i32));
+ Result = DAG.getNode(AMDILISD::VBUILD, DL,
+ Op.getValueType(), Result);
+ for (uint32_t x = 1; x < vecSize; ++x) {
+ SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
+ src, DAG.getConstant(offset + x, MVT::i32));
+ if (elt.getOpcode() != ISD::UNDEF) {
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ Op.getValueType(), Result, elt,
+ DAG.getConstant(x, MVT::i32));
+ }
+ }
+ } else {
+ SDValue idx = Op.getOperand(1);
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, vecType, src, idx);
+ Result = DAG.getNode(AMDILISD::VBUILD, DL,
+ Op.getValueType(), Result);
+ for (uint32_t x = 1; x < vecSize; ++x) {
+ idx = DAG.getNode(ISD::ADD, DL, vecType,
+ idx, DAG.getConstant(1, MVT::i32));
+ SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
+ src, idx);
+ if (elt.getOpcode() != ISD::UNDEF) {
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ Op.getValueType(), Result, elt, idx);
+ }
+ }
+ }
+ return Result;
+}
+SDValue
+AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ SDValue Res = DAG.getNode(AMDILISD::VBUILD,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0));
+ return Res;
+}
+SDValue
+AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue andOp;
+ andOp = DAG.getNode(
+ AMDILISD::AND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return andOp;
+}
+SDValue
+AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue orOp;
+ orOp = DAG.getNode(AMDILISD::OR,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return orOp;
+}
+SDValue
+AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(AMDILISD::CMOVLOG,
+ DL,
+ Op.getValueType(), Cond, LHS, RHS);
+ return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TRUE = Op.getOperand(2);
+ SDValue FALSE = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ DebugLoc DL = Op.getDebugLoc();
+ bool skipCMov = false;
+ bool genINot = false;
+ EVT OVT = Op.getValueType();
+
+ // Check for possible elimination of cmov
+ if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
+ const ConstantSDNode *trueConst
+ = dyn_cast<ConstantSDNode>( TRUE.getNode() );
+ const ConstantSDNode *falseConst
+ = dyn_cast<ConstantSDNode>( FALSE.getNode() );
+ if (trueConst && falseConst) {
+ // both possible result values are constants
+ if (trueConst->isAllOnesValue()
+ && falseConst->isNullValue()) { // and convenient constants
+ skipCMov = true;
+ } else if (trueConst->isNullValue()
+ && falseConst->isAllOnesValue()) { // less convenient
+ skipCMov = true;
+ genINot = true;
+ }
+ }
+ }
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ unsigned int AMDILCC = CondCCodeToCC(
+ SetCCOpcode,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+ Cond = DAG.getNode(
+ AMDILISD::CMP,
+ DL,
+ LHS.getValueType(),
+ DAG.getConstant(AMDILCC, MVT::i32),
+ LHS,
+ RHS);
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ if (genINot) {
+ Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
+ }
+ if (!skipCMov) {
+ Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
+ }
+ return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ unsigned int AMDILCC = CondCCodeToCC(
+ SetCCOpcode,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+ Cond = DAG.getNode(
+ AMDILISD::CMP,
+ DL,
+ LHS.getValueType(),
+ DAG.getConstant(AMDILCC, MVT::i32),
+ LHS,
+ RHS);
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(
+ ISD::AND,
+ DL,
+ Cond.getValueType(),
+ DAG.getConstant(1, Cond.getValueType()),
+ Cond);
+ return Cond;
+}
+
+SDValue
+AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Data = Op.getOperand(0);
+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+ DebugLoc DL = Op.getDebugLoc();
+ EVT DVT = Data.getValueType();
+ EVT BVT = BaseType->getVT();
+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+ unsigned shiftBits = srcBits - baseBits;
+ if (srcBits < 32) {
+ // If the op is less than 32 bits, then it needs to extend to 32bits
+ // so it can properly keep the upper bits valid.
+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+ shiftBits = 32 - baseBits;
+ DVT = IVT;
+ }
+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
+ // Shift left by 'Shift' bits.
+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+ // Signed shift Right by 'Shift' bits.
+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+ if (srcBits < 32) {
+ // Once the sign extension is done, the op needs to be converted to
+ // its original type.
+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+ }
+ return Data;
+}
+EVT
+AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
+{
+ int iSize = (size * numEle);
+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
+ if (!vEle) {
+ vEle = 1;
+ }
+ if (size == 64) {
+ if (vEle == 1) {
+ return EVT(MVT::i64);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
+ }
+ } else {
+ if (vEle == 1) {
+ return EVT(MVT::i32);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
+ }
+ }
+}
+
+SDValue
+AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Src = Op.getOperand(0);
+ SDValue Dst = Op;
+ SDValue Res;
+ DebugLoc DL = Op.getDebugLoc();
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Dst.getValueType();
+ // Lets bitcast the floating point types to an
+ // equivalent integer type before converting to vectors.
+ if (SrcVT.getScalarType().isFloatingPoint()) {
+ Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
+ SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
+ SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
+ Src);
+ SrcVT = Src.getValueType();
+ }
+ uint32_t ScalarSrcSize = SrcVT.getScalarType()
+ .getSimpleVT().getSizeInBits();
+ uint32_t ScalarDstSize = DstVT.getScalarType()
+ .getSimpleVT().getSizeInBits();
+ uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
+ bool isVec = SrcVT.isVector();
+ if (DstVT.getScalarType().isInteger() &&
+ (SrcVT.getScalarType().isInteger()
+ || SrcVT.getScalarType().isFloatingPoint())) {
+ if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
+ || (ScalarSrcSize == 64
+ && DstNumEle == 4
+ && ScalarDstSize == 16)) {
+ // This is the problematic case when bitcasting i64 <-> <4 x i16>
+ // This approach is a little different as we cannot generate a
+ // <4 x i64> vector
+ // as that is illegal in our backend and we are already past
+ // the DAG legalizer.
+ // So, in this case, we will do the following conversion.
+ // Case 1:
+ // %dst = <4 x i16> %src bitconvert i64 ==>
+ // %tmp = <4 x i16> %src convert <4 x i32>
+ // %tmp = <4 x i32> %tmp and 0xFFFF
+ // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
+ // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
+ // %dst = <2 x i32> %tmp bitcast i64
+ // case 2:
+ // %dst = i64 %src bitconvert <4 x i16> ==>
+ // %tmp = i64 %src bitcast <2 x i32>
+ // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
+ // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
+ // %tmp = <4 x i32> %tmp and 0xFFFF
+ // %dst = <4 x i16> %tmp bitcast <4 x i32>
+ SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
+ DAG.getConstant(0xFFFF, MVT::i32));
+ SDValue const16 = DAG.getConstant(16, MVT::i32);
+ if (ScalarDstSize == 64) {
+ // case 1
+ Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
+ Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
+ SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(0, MVT::i32));
+ SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(1, MVT::i32));
+ y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
+ SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(2, MVT::i32));
+ SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(3, MVT::i32));
+ w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
+ x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
+ y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
+ Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
+ return Res;
+ } else {
+ // case 2
+ SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
+ SDValue lor16
+ = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
+ SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
+ SDValue hir16
+ = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
+ SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
+ MVT::v4i32, lo);
+ SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ MVT::i32, DAG.getConstant(1, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, lor16, idxVal);
+ idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ MVT::i32, DAG.getConstant(2, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, hi, idxVal);
+ idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ MVT::i32, DAG.getConstant(3, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, hir16, idxVal);
+ resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
+ Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
+ return Res;
+ }
+ } else {
+ // There are four cases we need to worry about for bitcasts
+ // where the size of all
+ // source, intermediates and result is <= 128 bits, unlike
+ // the above case
+ // 1) Sub32bit bitcast 32bitAlign
+ // %dst = <4 x i8> bitcast i32
+ // (also <[2|4] x i16> to <[2|4] x i32>)
+ // 2) 32bitAlign bitcast Sub32bit
+ // %dst = i32 bitcast <4 x i8>
+ // 3) Sub32bit bitcast LargerSub32bit
+ // %dst = <2 x i8> bitcast i16
+ // (also <4 x i8> to <2 x i16>)
+ // 4) Sub32bit bitcast SmallerSub32bit
+ // %dst = i16 bitcast <2 x i8>
+ // (also <2 x i16> to <4 x i8>)
+ // This also only handles types that are powers of two
+ if ((ScalarDstSize & (ScalarDstSize - 1))
+ || (ScalarSrcSize & (ScalarSrcSize - 1))) {
+ } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
+ // case 1:
+ EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
+#if 0 // FIXME: LLVM does not like this for some reason, cannot SignExt vectors
+ SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
+#else
+ SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getConstant(0, IntTy));
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ MVT::i32, DAG.getConstant(x, MVT::i32));
+ SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ SrcVT.getScalarType(), Src,
+ DAG.getConstant(x, MVT::i32));
+ temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
+ res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
+ res, temp, idx);
+ }
+#endif
+ SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
+ SDValue *newEle = new SDValue[SrcNumEle];
+ res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ IntTy.getScalarType(), res,
+ DAG.getConstant(x, MVT::i32));
+ }
+ uint32_t Ratio = SrcNumEle / DstNumEle;
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ if (x % Ratio) {
+ newEle[x] = DAG.getNode(ISD::SHL, DL,
+ IntTy.getScalarType(), newEle[x],
+ DAG.getConstant(ScalarSrcSize * (x % Ratio),
+ MVT::i32));
+ }
+ }
+ for (uint32_t x = 0; x < SrcNumEle; x += 2) {
+ newEle[x] = DAG.getNode(ISD::OR, DL,
+ IntTy.getScalarType(), newEle[x], newEle[x + 1]);
+ }
+ if (ScalarSrcSize == 8) {
+ for (uint32_t x = 0; x < SrcNumEle; x += 4) {
+ newEle[x] = DAG.getNode(ISD::OR, DL,
+ IntTy.getScalarType(), newEle[x], newEle[x + 2]);
+ }
+ if (DstNumEle == 1) {
+ Dst = newEle[0];
+ } else {
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
+ newEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ MVT::i32, DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ DstVT, Dst, newEle[x * 4], idx);
+ }
+ }
+ } else {
+ if (DstNumEle == 1) {
+ Dst = newEle[0];
+ } else {
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
+ newEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ MVT::i32, DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ DstVT, Dst, newEle[x * 2], idx);
+ }
+ }
+ }
+ delete [] newEle;
+ return Dst;
+ } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
+ // case 2:
+ EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
+ SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getConstant(0, IntTy));
+ uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ for (uint32_t y = 0; y < mult; ++y) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ MVT::i32,
+ DAG.getConstant(x * mult + y, MVT::i32));
+ SDValue t;
+ if (SrcNumEle > 1) {
+ t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, SrcVT.getScalarType(), Src,
+ DAG.getConstant(x, MVT::i32));
+ } else {
+ t = Src;
+ }
+ if (y != 0) {
+ t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
+ t, DAG.getConstant(y * ScalarDstSize,
+ MVT::i32));
+ }
+ vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
+ DL, IntTy, vec, t, idx);
+ }
+ }
+ Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
+ return Dst;
+ } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
+ // case 3:
+ SDValue *numEle = new SDValue[SrcNumEle];
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ MVT::i8, Src, DAG.getConstant(x, MVT::i32));
+ numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
+ numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
+ DAG.getConstant(0xFF, MVT::i16));
+ }
+ for (uint32_t x = 1; x < SrcNumEle; x += 2) {
+ numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
+ DAG.getConstant(8, MVT::i16));
+ numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
+ numEle[x-1], numEle[x]);
+ }
+ if (DstNumEle > 1) {
+ // If we are not a scalar i16, the only other case is a
+ // v2i16 since we can't have v8i8 at this point, v4i16
+ // cannot be generated
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
+ numEle[0]);
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ MVT::i32, DAG.getConstant(1, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
+ Dst, numEle[2], idx);
+ } else {
+ Dst = numEle[0];
+ }
+ delete [] numEle;
+ return Dst;
+ } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
+ // case 4:
+ SDValue *numEle = new SDValue[DstNumEle];
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ MVT::i16, Src, DAG.getConstant(x, MVT::i32));
+ numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
+ numEle[x * 2], DAG.getConstant(8, MVT::i16));
+ }
+ MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ MVT::i32, DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
+ Dst, numEle[x], idx);
+ }
+ delete [] numEle;
+ ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
+ Res = DAG.getSExtOrTrunc(Dst, DL, ty);
+ return Res;
+ }
+ }
+ }
+ Res = DAG.getNode(AMDILISD::BITCONV,
+ Dst.getDebugLoc(),
+ Dst.getValueType(), Src);
+ return Res;
+}
+
+SDValue
+AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ unsigned int SPReg = AMDIL::SP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue SP = DAG.getCopyFromReg(Chain,
+ DL,
+ SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::ADD,
+ DL,
+ MVT::i32, SP, Size);
+ Chain = DAG.getCopyToReg(SP.getValue(1),
+ DL,
+ SPReg, NewSP);
+ SDValue Ops[2] = {NewSP, Chain};
+ Chain = DAG.getMergeValues(Ops, 2 ,DL);
+ return Chain;
+}
+SDValue
+AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+ SDValue Result;
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue JumpT = Op.getOperand(4);
+ SDValue CmpValue;
+ ISD::CondCode CC = CCNode->get();
+ SDValue Result;
+ unsigned int cmpOpcode = CondCCodeToCC(
+ CC,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ CmpValue = DAG.getNode(
+ AMDILISD::CMP,
+ Op.getDebugLoc(),
+ LHS.getValueType(),
+ DAG.getConstant(cmpOpcode, MVT::i32),
+ LHS, RHS);
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ CmpValue.getDebugLoc(),
+ MVT::Other, Chain,
+ JumpT, CmpValue);
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Result = DAG.getNode(
+ AMDILISD::DP_TO_FP,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Result = DAG.getNode(
+ AMDILISD::VCONCAT,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return Result;
+}
+// LowerRET - Lower an ISD::RET node.
+SDValue
+AMDILTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG)
+const
+{
+ //MachineFunction& MF = DAG.getMachineFunction();
+ // CCValAssign - represent the assignment of the return value
+ // to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
+ MRI.addLiveOut(RVLocs[i].getLocReg());
+ }
+ }
+ // FIXME: implement this when tail call is implemented
+ // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
+ // both x86 and ppc implement this in ISelLowering
+
+ // Regular return here
+ SDValue Flag;
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain);
+ RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ SDValue ValToCopy = OutVals[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ // ISD::Ret => ret chain, (regnum1, val1), ...
+ // So i * 2 + 1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain,
+ dl,
+ VA.getLocReg(),
+ ValToCopy,
+ Flag);
+ // guarantee that all emitted copies are stuck together
+ // avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+ /*if (MF.getFunction()->hasStructRetAttr()) {
+ assert(0 && "Struct returns are not yet implemented!");
+ // Both MIPS and X86 have this
+ }*/
+ RetOps[0] = Chain;
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ Flag = DAG.getNode(AMDILISD::RET_FLAG,
+ dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+ return Flag;
+}
+void
+AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
+ unsigned int opCode) const
+{
+ MachineOperand DST = MI->getOperand(0);
+ MachineOperand LHS = MI->getOperand(2);
+ MachineOperand RHS = MI->getOperand(3);
+ unsigned int opi32Code = 0, si32Code = 0;
+ unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
+ uint32_t REGS[12];
+ switch (simpleVT) {
+ case AMDIL::GPRI64RegClassID:
+ simpleVT = AMDIL::GPRI32RegClassID;
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ simpleVT = AMDIL::GPRV2I64RegClassID;
+ break;
+ };
+ // All the relationals can be generated with with 6 temp registers
+ for (int x = 0; x < 12; ++x) {
+ REGS[x] = genVReg(simpleVT);
+ }
+ // Pull out the high and low components of each 64 bit register
+ generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
+ generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
+ generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
+ generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
+ // Determine the correct opcode that we should use
+ switch(opCode) {
+ default:
+ assert(!"comparison case not handled!");
+ break;
+ case AMDIL::LEQ:
+ si32Code = opi32Code = AMDIL::IEQ;
+ break;
+ case AMDIL::LNE:
+ si32Code = opi32Code = AMDIL::INE;
+ break;
+ case AMDIL::LLE:
+ case AMDIL::ULLE:
+ case AMDIL::LGE:
+ case AMDIL::ULGE:
+ if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
+ std::swap(REGS[0], REGS[2]);
+ } else {
+ std::swap(REGS[1], REGS[3]);
+ }
+ if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
+ opi32Code = AMDIL::ILT;
+ } else {
+ opi32Code = AMDIL::ULT;
+ }
+ si32Code = AMDIL::UGE;
+ break;
+ case AMDIL::LGT:
+ case AMDIL::ULGT:
+ std::swap(REGS[0], REGS[2]);
+ std::swap(REGS[1], REGS[3]);
+ case AMDIL::LLT:
+ case AMDIL::ULLT:
+ if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
+ opi32Code = AMDIL::ILT;
+ } else {
+ opi32Code = AMDIL::ULT;
+ }
+ si32Code = AMDIL::ULT;
+ break;
+ };
+ // Do the initial opcode on the high and low components.
+ // This leaves the following:
+ // REGS[4] = L_HI OP R_HI
+ // REGS[5] = L_LO OP R_LO
+ generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
+ generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
+ switch(opi32Code) {
+ case AMDIL::IEQ:
+ case AMDIL::INE: {
+ // combine the results with an and or or depending on if
+ // we are eq or ne
+ uint32_t combineOp = (opi32Code == AMDIL::IEQ)
+ ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
+ generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
+ }
+ break;
+ default:
+ // this finishes codegen for the following pattern
+ // REGS[4] || (REGS[5] && (L_HI == R_HI))
+ generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
+ generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
+ REGS[9]);
+ generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
+ REGS[10]);
+ break;
+ }
+ generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
+}
+
+unsigned int
+AMDILTargetLowering::getFunctionAlignment(const Function *) const
+{
+ return 0;
+}
+
+bool
+AMDILTargetLowering::isLoadBitCastBeneficial(EVT lVT, EVT bVT) const
+{
+ return !(lVT.getSizeInBits() == bVT.getSizeInBits()
+ && lVT.getScalarType().getSizeInBits() > bVT.getScalarType().getSizeInBits()
+ && bVT.getScalarType().getSizeInBits() < 32
+ && lVT.getScalarType().getSizeInBits() >= 32);
+}
+
+void
+AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator &BBI,
+ DebugLoc *DL, const TargetInstrInfo *TII) const
+{
+ mBB = BB;
+ mBBI = BBI;
+ mDL = DL;
+ mTII = TII;
+}
+uint32_t
+AMDILTargetLowering::genVReg(uint32_t regType) const
+{
+ return mBB->getParent()->getRegInfo().createVirtualRegister(
+ getRegClassFromID(regType));
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
+{
+ return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1) const
+{
+ return generateMachineInst(opcode, dst).addReg(src1);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1, uint32_t src2) const
+{
+ return generateMachineInst(opcode, dst, src1).addReg(src2);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1, uint32_t src2, uint32_t src3) const
+{
+ return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
+}
+
+
+SDValue
+AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ jq = DAG.getSExtOrTrunc(jq, DL, OVT);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv = DAG.getSetCC(DL, OVT, fr, fb, ISD::SETOGE);
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r0, DAG.getConstant(0, OVT));
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r1, DAG.getConstant(0, OVT));
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+
+ // The LowerUDIV24 function implements the following CL.
+ // int ia = (int)LHS
+ // float fa = (float)ia
+ // int ib = (int)RHS
+ // float fb = (float)ib
+ // float fq = native_divide(fa, fb)
+ // fq = trunc(fq)
+ // float t = mad(fq, fb, fb)
+ // int iq = (int)fq - (t <= fa)
+ // return (type)iq
+
+ // int ia = (int)LHS
+ SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
+
+ // float fa = (float)ia
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // int ib = (int)RHS
+ SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
+
+ // float fb = (float)ib
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb)
+ SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq)
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float t = mad(fq, fb, fb)
+ SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
+
+ // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
+ SDValue iq;
+ fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+ if (INTTY == MVT::i32) {
+ iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
+ } else {
+ iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
+ }
+ iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
+
+
+ // return (type)iq
+ iq = DAG.getZExtOrTrunc(iq, DL, OVT);
+ return iq;
+
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+SDValue
+AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r0, DAG.getConstant(0, OVT));
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r1, DAG.getConstant(0, OVT));
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM8 function generates equivalent to the following IL.
+ // mov r0, as_u32(LHS)
+ // mov r1, as_u32(RHS)
+ // and r10, r0, 0xFF
+ // and r11, r1, 0xFF
+ // cmov_logical r3, r11, r11, 0x1
+ // udiv r3, r10, r3
+ // cmov_logical r3, r11, r3, 0
+ // umul r3, r3, r11
+ // sub r3, r10, r3
+ // and as_u8(DST), r3, 0xFF
+
+ // mov r0, as_u32(LHS)
+ SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // mov r1, as_u32(RHS)
+ SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // and r10, r0, 0xFF
+ SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
+ DAG.getConstant(0xFF, INTTY));
+
+ // and r11, r1, 0xFF
+ SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
+ DAG.getConstant(0xFF, INTTY));
+
+ // cmov_logical r3, r11, r11, 0x1
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
+ DAG.getConstant(0x01, INTTY));
+
+ // udiv r3, r10, r3
+ r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
+
+ // cmov_logical r3, r11, r3, 0
+ r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
+ DAG.getConstant(0, INTTY));
+
+ // umul r3, r3, r11
+ r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
+
+ // sub r3, r10, r3
+ r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
+
+ // and as_u8(DST), r3, 0xFF
+ SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
+ DAG.getConstant(0xFF, INTTY));
+ DST = DAG.getZExtOrTrunc(DST, DL, OVT);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM16 function generatest equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // DIV = LowerUDIV16(LHS, RHS)
+ // and r10, r0, 0xFFFF
+ // and r11, r1, 0xFFFF
+ // cmov_logical r3, r11, r11, 0x1
+ // udiv as_u16(r3), as_u32(r10), as_u32(r3)
+ // and r3, r3, 0xFFFF
+ // cmov_logical r3, r11, r3, 0
+ // umul r3, r3, r11
+ // sub r3, r10, r3
+ // and DST, r3, 0xFFFF
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // and r10, r0, 0xFFFF
+ SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // and r11, r1, 0xFFFF
+ SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // cmov_logical r3, r11, r11, 0x1
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
+ DAG.getConstant(0x01, OVT));
+
+ // udiv as_u16(r3), as_u32(r10), as_u32(r3)
+ r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
+ r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
+ r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
+ r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
+ r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
+
+ // and r3, r3, 0xFFFF
+ r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // cmov_logical r3, r11, r3, 0
+ r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
+ DAG.getConstant(0, OVT));
+ // umul r3, r3, r11
+ r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
+
+ // sub r3, r10, r3
+ r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
+
+ // and DST, r3, 0xFFFF
+ SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
+ DAG.getConstant(0xFFFF, OVT));
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM32 function generates equivalent to the following IL.
+ // udiv r20, LHS, RHS
+ // umul r20, r20, RHS
+ // sub DST, LHS, r20
+
+ // udiv r20, LHS, RHS
+ SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
+
+ // umul r20, r20, RHS
+ r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
+
+ // sub DST, LHS, r20
+ SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+
+SDValue
+AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2f32) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4f32) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue DST;
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // TODO: This doesn't work for vector types yet
+ // The LowerFDIV32 function generates equivalent to the following
+ // IL:
+ // mov r20, as_int(LHS)
+ // mov r21, as_int(RHS)
+ // and r30, r20, 0x7f800000
+ // and r31, r20, 0x807FFFFF
+ // and r32, r21, 0x7f800000
+ // and r33, r21, 0x807FFFFF
+ // ieq r40, r30, 0x7F800000
+ // ieq r41, r31, 0x7F800000
+ // ieq r42, r32, 0
+ // ieq r43, r33, 0
+ // and r50, r20, 0x80000000
+ // and r51, r21, 0x80000000
+ // ior r32, r32, 0x3f800000
+ // ior r33, r33, 0x3f800000
+ // cmov_logical r32, r42, r50, r32
+ // cmov_logical r33, r43, r51, r33
+ // cmov_logical r32, r40, r20, r32
+ // cmov_logical r33, r41, r21, r33
+ // ior r50, r40, r41
+ // ior r51, r42, r43
+ // ior r50, r50, r51
+ // inegate r52, r31
+ // iadd r30, r30, r52
+ // cmov_logical r30, r50, 0, r30
+ // div_zeroop(infinity) r21, 1.0, r33
+ // mul_ieee r20, r32, r21
+ // and r22, r20, 0x7FFFFFFF
+ // and r23, r20, 0x80000000
+ // ishr r60, r22, 0x00000017
+ // ishr r61, r30, 0x00000017
+ // iadd r20, r20, r30
+ // iadd r21, r22, r30
+ // iadd r60, r60, r61
+ // ige r42, 0, R60
+ // ior r41, r23, 0x7F800000
+ // ige r40, r60, 0x000000FF
+ // cmov_logical r40, r50, 0, r40
+ // cmov_logical r20, r42, r23, r20
+ // cmov_logical DST, r40, r41, r20
+ // as_float(DST)
+
+ // mov r20, as_int(LHS)
+ SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
+
+ // mov r21, as_int(RHS)
+ SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
+
+ // and r30, r20, 0x7f800000
+ SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x7F800000, INTTY));
+
+ // and r31, r21, 0x7f800000
+ SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x7f800000, INTTY));
+
+ // and r32, r20, 0x807FFFFF
+ SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x807FFFFF, INTTY));
+
+ // and r33, r21, 0x807FFFFF
+ SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x807FFFFF, INTTY));
+
+ // ieq r40, r30, 0x7F800000
+ SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R30, DAG.getConstant(0x7F800000, INTTY));
+
+ // ieq r41, r31, 0x7F800000
+ SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R31, DAG.getConstant(0x7F800000, INTTY));
+
+ // ieq r42, r30, 0
+ SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R30, DAG.getConstant(0, INTTY));
+
+ // ieq r43, r31, 0
+ SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R31, DAG.getConstant(0, INTTY));
+
+ // and r50, r20, 0x80000000
+ SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // and r51, r21, 0x80000000
+ SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // ior r32, r32, 0x3f800000
+ R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
+ DAG.getConstant(0x3F800000, INTTY));
+
+ // ior r33, r33, 0x3f800000
+ R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
+ DAG.getConstant(0x3F800000, INTTY));
+
+ // cmov_logical r32, r42, r50, r32
+ R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
+
+ // cmov_logical r33, r43, r51, r33
+ R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
+
+ // cmov_logical r32, r40, r20, r32
+ R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
+
+ // cmov_logical r33, r41, r21, r33
+ R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
+
+ // ior r50, r40, r41
+ R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
+
+ // ior r51, r42, r43
+ R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
+
+ // ior r50, r50, r51
+ R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
+
+ // inegate r52, r31
+ SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
+
+ // iadd r30, r30, r52
+ R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
+
+ // cmov_logical r30, r50, 0, r30
+ R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
+ DAG.getConstant(0, INTTY), R30);
+
+ // div_zeroop(infinity) r21, 1.0, as_float(r33)
+ R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
+ R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+ DAG.getConstantFP(1.0f, OVT), R33);
+
+ // mul_ieee as_int(r20), as_float(r32), r21
+ R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
+ R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
+ R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
+
+ // div_zeroop(infinity) r21, 1.0, as_float(r33)
+ R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
+ R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+ DAG.getConstantFP(1.0f, OVT), R33);
+
+ // mul_ieee as_int(r20), as_float(r32), r21
+ R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
+ R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
+ R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
+
+ // and r22, r20, 0x7FFFFFFF
+ SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x7FFFFFFF, INTTY));
+
+ // and r23, r20, 0x80000000
+ SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // ishr r60, r22, 0x00000017
+ SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
+ DAG.getConstant(0x00000017, INTTY));
+
+ // ishr r61, r30, 0x00000017
+ SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
+ DAG.getConstant(0x00000017, INTTY));
+
+ // iadd r20, r20, r30
+ R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
+
+ // iadd r21, r22, r30
+ R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
+
+ // iadd r60, r60, r61
+ R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
+
+ // ige r42, 0, R60
+ R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ DAG.getConstant(0, INTTY),
+ R60);
+
+ // ior r41, r23, 0x7F800000
+ R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
+ DAG.getConstant(0x7F800000, INTTY));
+
+ // ige r40, r60, 0x000000FF
+ R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ R60,
+ DAG.getConstant(0x0000000FF, INTTY));
+
+ // cmov_logical r40, r50, 0, r40
+ R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
+ DAG.getConstant(0, INTTY),
+ R40);
+
+ // cmov_logical r20, r42, r23, r20
+ R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
+
+ // cmov_logical DST, r40, r41, r20
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
+
+ // as_float(DST)
+ DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
+ } else {
+ // The following sequence of DAG nodes produce the following IL:
+ // fabs r1, RHS
+ // lt r2, 0x1.0p+96f, r1
+ // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
+ // mul_ieee r1, RHS, r3
+ // div_zeroop(infinity) r0, LHS, r1
+ // mul_ieee DST, r0, r3
+
+ // fabs r1, RHS
+ SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
+ // lt r2, 0x1.0p+96f, r1
+ SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
+ DAG.getConstant(0x6f800000, INTTY), r1);
+ // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
+ DAG.getConstant(0x2f800000, INTTY),
+ DAG.getConstant(0x3f800000, INTTY));
+ // mul_ieee r1, RHS, r3
+ r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
+ // div_zeroop(infinity) r0, LHS, r1
+ SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
+ // mul_ieee DST, r0, r3
+ DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,563 @@
+//===-- AMDILISelLowering.h -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AMDIL uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_ISELLOWERING_H_
+#define AMDIL_ISELLOWERING_H_
+#include "AMDIL.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm
+{
+namespace AMDILISD
+{
+enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ INTTOANY, // Dummy instruction that takes an int and goes to
+ // any type converts the SDNode to an int
+ DP_TO_FP, // Conversion from 64bit FP to 32bit FP
+ FP_TO_DP, // Conversion from 32bit FP to 64bit FP
+ BITCONV, // instruction that converts from any type to any type
+ CMOV, // 32bit FP Conditional move instruction
+ CMOVLOG, // 32bit FP Conditional move logical instruction
+ SELECT, // 32bit FP Conditional move logical instruction
+ SETCC, // 32bit FP Conditional move logical instruction
+ ISGN, // 32bit Int Sign instruction
+ INEGATE, // 32bit Int Negation instruction
+ MAD, // 32bit Fused Multiply Add instruction
+ ADD, // 32/64 bit pseudo instruction
+ AND, // 128 bit and instruction
+ OR, // 128 bit or instruction
+ NOT, // 128 bit not instruction
+ XOR, // 128 bit xor instruction
+ MOVE, // generic mov instruction
+ PHIMOVE, // generic phi-node mov instruction
+ VBUILD, // scalar to vector mov instruction
+ VEXTRACT, // extract vector components
+ VINSERT, // insert vector components
+ VCONCAT, // concat a single vector to another vector
+ UMAD, // 32bit UInt Fused Multiply Add instruction
+ CALL, // Function call based on a single integer
+ RET, // Return from a function call
+ SELECT_CC, // Select the correct conditional instruction
+ BRCC, // Select the correct branch instruction
+ CMPCC, // Compare to GPR operands
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC
+ //flags.
+ SELECT_FCC, // Select between two values using the current FCC
+ //flags.
+ LCREATE, // Create a 64bit integer from two 32 bit integers
+ LCOMPHI, // Get the hi 32 bits from a 64 bit integer
+ LCOMPLO, // Get the lo 32 bits from a 64 bit integer
+ DCREATE, // Create a 64bit float from two 32 bit integers
+ DCOMPHI, // Get the hi 32 bits from a 64 bit float
+ DCOMPLO, // Get the lo 32 bits from a 64 bit float
+ LCREATE2, // Create a 64bit integer from two 32 bit integers
+ LCOMPHI2, // Get the hi 32 bits from a 64 bit integer
+ LCOMPLO2, // Get the lo 32 bits from a 64 bit integer
+ DCREATE2, // Create a 64bit float from two 32 bit integers
+ DCOMPHI2, // Get the hi 32 bits from a 64 bit float
+ DCOMPLO2, // Get the lo 32 bits from a 64 bit float
+ UMUL, // 32bit unsigned multiplication
+ IFFB_HI, // 32bit find first hi bit instruction
+ IFFB_LO, // 32bit find first low bit instruction
+ DIV_INF, // Divide with infinity returned on zero divisor
+ SMAX, // Signed integer max
+ CMP,
+ IL_CC_I_GT,
+ IL_CC_I_LT,
+ IL_CC_I_GE,
+ IL_CC_I_LE,
+ IL_CC_I_EQ,
+ IL_CC_I_NE,
+ RET_FLAG,
+ BRANCH_COND,
+ LOOP_NZERO,
+ LOOP_ZERO,
+ LOOP_CMP,
+ ADDADDR,
+ // ATOMIC Operations
+ // Global Memory
+ ATOM_G_ADD = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ ATOM_G_AND,
+ ATOM_G_CMPXCHG,
+ ATOM_G_DEC,
+ ATOM_G_INC,
+ ATOM_G_MAX,
+ ATOM_G_UMAX,
+ ATOM_G_MIN,
+ ATOM_G_UMIN,
+ ATOM_G_OR,
+ ATOM_G_SUB,
+ ATOM_G_RSUB,
+ ATOM_G_XCHG,
+ ATOM_G_XOR,
+ ATOM_G_ADD_NORET,
+ ATOM_G_AND_NORET,
+ ATOM_G_CMPXCHG_NORET,
+ ATOM_G_DEC_NORET,
+ ATOM_G_INC_NORET,
+ ATOM_G_MAX_NORET,
+ ATOM_G_UMAX_NORET,
+ ATOM_G_MIN_NORET,
+ ATOM_G_UMIN_NORET,
+ ATOM_G_OR_NORET,
+ ATOM_G_SUB_NORET,
+ ATOM_G_RSUB_NORET,
+ ATOM_G_XCHG_NORET,
+ ATOM_G_XOR_NORET,
+ // Local Memory
+ ATOM_L_ADD,
+ ATOM_L_AND,
+ ATOM_L_CMPXCHG,
+ ATOM_L_DEC,
+ ATOM_L_INC,
+ ATOM_L_MAX,
+ ATOM_L_UMAX,
+ ATOM_L_MIN,
+ ATOM_L_UMIN,
+ ATOM_L_OR,
+ ATOM_L_MSKOR,
+ ATOM_L_SUB,
+ ATOM_L_RSUB,
+ ATOM_L_XCHG,
+ ATOM_L_XOR,
+ ATOM_L_ADD_NORET,
+ ATOM_L_AND_NORET,
+ ATOM_L_CMPXCHG_NORET,
+ ATOM_L_DEC_NORET,
+ ATOM_L_INC_NORET,
+ ATOM_L_MAX_NORET,
+ ATOM_L_UMAX_NORET,
+ ATOM_L_MIN_NORET,
+ ATOM_L_UMIN_NORET,
+ ATOM_L_OR_NORET,
+ ATOM_L_MSKOR_NORET,
+ ATOM_L_SUB_NORET,
+ ATOM_L_RSUB_NORET,
+ ATOM_L_XCHG_NORET,
+ ATOM_L_XOR_NORET,
+ // Region Memory
+ ATOM_R_ADD,
+ ATOM_R_AND,
+ ATOM_R_CMPXCHG,
+ ATOM_R_DEC,
+ ATOM_R_INC,
+ ATOM_R_MAX,
+ ATOM_R_UMAX,
+ ATOM_R_MIN,
+ ATOM_R_UMIN,
+ ATOM_R_OR,
+ ATOM_R_MSKOR,
+ ATOM_R_SUB,
+ ATOM_R_RSUB,
+ ATOM_R_XCHG,
+ ATOM_R_XOR,
+ ATOM_R_ADD_NORET,
+ ATOM_R_AND_NORET,
+ ATOM_R_CMPXCHG_NORET,
+ ATOM_R_DEC_NORET,
+ ATOM_R_INC_NORET,
+ ATOM_R_MAX_NORET,
+ ATOM_R_UMAX_NORET,
+ ATOM_R_MIN_NORET,
+ ATOM_R_UMIN_NORET,
+ ATOM_R_OR_NORET,
+ ATOM_R_MSKOR_NORET,
+ ATOM_R_SUB_NORET,
+ ATOM_R_RSUB_NORET,
+ ATOM_R_XCHG_NORET,
+ ATOM_R_XOR_NORET,
+ // Append buffer
+ APPEND_ALLOC,
+ APPEND_CONSUME,
+ // 2D Images
+ IMAGE2D_READ,
+ IMAGE2D_WRITE,
+ IMAGE2D_INFO0,
+ IMAGE2D_INFO1,
+ // 3D Images
+ IMAGE3D_READ,
+ IMAGE3D_WRITE,
+ IMAGE3D_INFO0,
+ IMAGE3D_INFO1,
+ ATOM_F_ADD,
+ ATOM_F_AND,
+ ATOM_F_CMPXCHG,
+ ATOM_F_DEC,
+ ATOM_F_INC,
+ ATOM_F_MAX,
+ ATOM_F_UMAX,
+ ATOM_F_MIN,
+ ATOM_F_UMIN,
+ ATOM_F_OR,
+ ATOM_F_SUB,
+ ATOM_F_XCHG,
+ ATOM_F_XOR,
+
+ LAST_ISD_NUMBER
+};
+} // AMDILISD
+
+class MachineBasicBlock;
+class MachineInstr;
+class DebugLoc;
+class TargetInstrInfo;
+
+class AMDILTargetLowering : public TargetLowering
+{
+private:
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+public:
+ AMDILTargetLowering(TargetMachine &TM);
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const {
+ return MVT::i32;
+ }
+
+ virtual SDValue
+ LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ int
+ getVarArgsFrameOffset() const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of
+ /// the bits specified
+ /// in Mask are known to be either zero or one and return them in
+ /// the
+ /// KnownZero/KnownOne bitsets.
+ virtual void
+ computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0
+ ) const;
+
+ virtual MachineBasicBlock*
+ EmitInstrWithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual bool
+ getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const;
+ virtual const char*
+ getTargetNodeName(
+ unsigned Opcode
+ ) const;
+
+ /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ // We want to mark f32/f64 floating point values as
+ // legal
+ bool
+ isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ // We don't want to shrink f64/f32 constants because
+ // they both take up the same amount of space and
+ // we don't want to use a f2d instruction.
+ bool ShouldShrinkFPConstant(EVT VT) const;
+
+ /// getFunctionAlignment - Return the Log2 alignment of this
+ /// function.
+ unsigned int
+ getFunctionAlignment(const Function *F) const;
+
+ /// This function returns true if the target allows unaligned memory accesses.
+ /// of the specified type. This is used, for example, in situations where an
+ /// array copy/move/set is converted to a sequence of store operations. It's
+ /// use helps to ensure that such replacements don't generate code that causes
+ /// an alignment error (trap) on the target machine.
+ /// @brief Determine if the target supports unaligned memory accesses.
+ bool allowsUnalignedMemoryAccesses(EVT VT) const;
+
+ /// Return true if the load uses larger data types than
+ /// the bitcast and false otherwise.
+ /// This should disable optimizing:
+ /// (char16)((int4*)ptr)[idx] => (char16*)ptr[idx]
+ /// but not disable:
+ /// (int4)((char16*)ptr)[idx] => (int4*)ptr[idx]
+ bool
+ isLoadBitCastBeneficial(EVT load, EVT bitcast) const;
+
+private:
+ CCAssignFn*
+ CCAssignFnForNode(unsigned int CC) const;
+
+ SDValue LowerCallResult(SDValue Chain,
+ SDValue InFlag,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA, MachineFrameInfo *MFI,
+ unsigned i) const;
+
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
+ SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool doesNotRet,
+ bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ //+++--- Function dealing with conversions between floating point and
+ //integer types ---+++//
+ SDValue
+ genCLZu64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ genCLZuN(SDValue Op, SelectionDAG &DAG, uint32_t bits) const;
+ SDValue
+ genCLZu32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ genf64toi32(SDValue Op, SelectionDAG &DAG,
+ bool includeSign) const;
+
+ SDValue
+ genf64toi64(SDValue Op, SelectionDAG &DAG,
+ bool includeSign) const;
+
+ SDValue
+ genu32tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const;
+
+ SDValue
+ genu64tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG& DAG) const;
+
+ SDValue
+ LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG& DAG) const;
+
+ SDValue
+ LowerINTRINSIC_VOID(SDValue Op, SelectionDAG& DAG) const;
+
+ SDValue
+ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerADD(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSUB(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerUREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerUDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerAND(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+
+ EVT
+ genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
+
+ SDValue
+ LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ void
+ generateCMPInstr(MachineInstr*, MachineBasicBlock*,
+ const TargetInstrInfo&) const;
+ MachineOperand
+ convertToReg(MachineOperand) const;
+
+ // private members used by the set of instruction generation
+ // functions, these are marked mutable as they are cached so
+ // that they don't have to constantly be looked up when using the
+ // generateMachineInst/genVReg instructions. This is to simplify
+ // the code
+ // and to make it cleaner. The object itself doesn't change as
+ // only these functions use these three data types.
+ mutable MachineBasicBlock *mBB;
+ mutable DebugLoc *mDL;
+ mutable const TargetInstrInfo *mTII;
+ mutable MachineBasicBlock::iterator mBBI;
+ void
+ setPrivateData(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator &BBI,
+ DebugLoc *DL,
+ const TargetInstrInfo *TII) const;
+ uint32_t genVReg(uint32_t regType) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst, uint32_t src1) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst, uint32_t src1, uint32_t src2) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst, uint32_t src1, uint32_t src2,
+ uint32_t src3) const;
+ uint32_t
+ addExtensionInstructions(
+ uint32_t reg, bool signedShift,
+ unsigned int simpleVT) const;
+ void
+ generateLongRelational(MachineInstr *MI,
+ unsigned int opCode) const;
+
+}; // AMDILTargetLowering
+} // end namespace llvm
+
+#endif // AMDIL_ISELLOWERING_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,171 @@
+//===-- AMDILImageExpansion.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementatino of the Image expansion class for image capable devices.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILIOExpansion.h"
+#include "AMDILKernelManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+AMDILImageExpansion::AMDILImageExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : AMDIL789IOExpansion(tm, OptLevel)
+{
+}
+
+AMDILImageExpansion::~AMDILImageExpansion()
+{
+}
+void AMDILImageExpansion::expandInefficientImageLoad(
+ MachineBasicBlock *mBB, MachineInstr *MI)
+{
+#if 0
+ const llvm::StringRef &name = MI->getOperand(0).getGlobal()->getName();
+ const char *tReg1, *tReg2, *tReg3, *tReg4;
+ tReg1 = mASM->getRegisterName(MI->getOperand(1).getReg());
+ if (MI->getOperand(2).isReg()) {
+ tReg2 = mASM->getRegisterName(MI->getOperand(2).getReg());
+ } else {
+ tReg2 = mASM->getRegisterName(AMDIL::R1);
+ O << "\tmov " << tReg2 << ", l" << MI->getOperand(2).getImm() << "\n";
+ }
+ if (MI->getOperand(3).isReg()) {
+ tReg3 = mASM->getRegisterName(MI->getOperand(3).getReg());
+ } else {
+ tReg3 = mASM->getRegisterName(AMDIL::R2);
+ O << "\tmov " << tReg3 << ", l" << MI->getOperand(3).getImm() << "\n";
+ }
+ if (MI->getOperand(4).isReg()) {
+ tReg4 = mASM->getRegisterName(MI->getOperand(4).getReg());
+ } else {
+ tReg4 = mASM->getRegisterName(AMDIL::R3);
+ O << "\tmov " << tReg2 << ", l" << MI->getOperand(4).getImm() << "\n";
+ }
+ bool internalSampler = false;
+ //bool linear = true;
+ unsigned ImageCount = 3; // OPENCL_MAX_READ_IMAGES
+ unsigned SamplerCount = 3; // OPENCL_MAX_SAMPLERS
+ if (ImageCount - 1) {
+ O << "\tswitch " << mASM->getRegisterName(MI->getOperand(1).getReg())
+ << "\n";
+ }
+ for (unsigned rID = 0; rID < ImageCount; ++rID) {
+ if (ImageCount - 1) {
+ if (!rID) {
+ O << "\tdefault\n";
+ } else {
+ O << "\tcase " << rID << "\n" ;
+ }
+ O << "\tswitch " << mASM->getRegisterName(MI->getOperand(2).getReg())
+ << "\n";
+ }
+ for (unsigned sID = 0; sID < SamplerCount; ++sID) {
+ if (SamplerCount - 1) {
+ if (!sID) {
+ O << "\tdefault\n";
+ } else {
+ O << "\tcase " << sID << "\n" ;
+ }
+ }
+ if (internalSampler) {
+ // Check if sampler has normalized setting.
+ O << "\tand r0.x, " << tReg2 << ".x, l0.y\n"
+ << "\tif_logicalz r0.x\n"
+ << "\tflr " << tReg3 << ", " << tReg3 << "\n"
+ << "\tsample_resource(" << rID << ")_sampler("
+ << sID << ")_coordtype(unnormalized) "
+ << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n"
+ << "\telse\n"
+ << "\tiadd " << tReg1 << ".y, " << tReg1 << ".x, l0.y\n"
+ << "\titof " << tReg2 << ", cb1[" << tReg1 << ".x].xyz\n"
+ << "\tmul " << tReg3 << ", " << tReg3 << ", " << tReg2 << "\n"
+ << "\tflr " << tReg3 << ", " << tReg3 << "\n"
+ << "\tmul " << tReg3 << ", " << tReg3 << ", cb1["
+ << tReg1 << ".y].xyz\n"
+ << "\tsample_resource(" << rID << ")_sampler("
+ << sID << ")_coordtype(normalized) "
+ << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n"
+ << "\tendif\n";
+ } else {
+ O << "\tiadd " << tReg1 << ".y, " << tReg1 << ".x, l0.y\n"
+ // Check if sampler has normalized setting.
+ << "\tand r0, " << tReg2 << ".x, l0.y\n"
+ // Convert image dimensions to float.
+ << "\titof " << tReg4 << ", cb1[" << tReg1 << ".x].xyz\n"
+ // Move into R0 1 if unnormalized or dimensions if normalized.
+ << "\tcmov_logical r0, r0, " << tReg4 << ", r1.1111\n"
+ // Make coordinates unnormalized.
+ << "\tmul " << tReg3 << ", r0, " << tReg3 << "\n"
+ // Get linear filtering if set.
+ << "\tand " << tReg4 << ", " << tReg2 << ".x, l6.x\n"
+ // Save unnormalized coordinates in R0.
+ << "\tmov r0, " << tReg3 << "\n"
+ // Floor the coordinates due to HW incompatibility with precision
+ // requirements.
+ << "\tflr " << tReg3 << ", " << tReg3 << "\n"
+ // get Origianl coordinates (without floor) if linear filtering
+ << "\tcmov_logical " << tReg3 << ", " << tReg4
+ << ".xxxx, r0, " << tReg3 << "\n"
+ // Normalize the coordinates with multiplying by 1/dimensions
+ << "\tmul " << tReg3 << ", " << tReg3 << ", cb1["
+ << tReg1 << ".y].xyz\n"
+ << "\tsample_resource(" << rID << ")_sampler("
+ << sID << ")_coordtype(normalized) "
+ << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n";
+ }
+ if (SamplerCount - 1) {
+ O << "\tbreak\n";
+ }
+ }
+ if (SamplerCount - 1) {
+ O << "\tendswitch\n";
+ }
+ if (ImageCount - 1) {
+ O << "\tbreak\n";
+ }
+ }
+ if (ImageCount - 1) {
+ O << "\tendswitch\n";
+ }
+#endif
+}
+void
+AMDILImageExpansion::expandImageLoad(MachineBasicBlock *mBB, MachineInstr *MI)
+{
+ uint32_t imageID = getPointerID(MI);
+ MI->getOperand(1).ChangeToImmediate(imageID);
+ saveInst = true;
+}
+void
+AMDILImageExpansion::expandImageStore(MachineBasicBlock *mBB, MachineInstr *MI)
+{
+ uint32_t imageID = getPointerID(MI);
+ mKM->setOutputInst();
+ MI->getOperand(0).ChangeToImmediate(imageID);
+ saveInst = true;
+}
+void
+AMDILImageExpansion::expandImageParam(MachineBasicBlock *mBB, MachineInstr *MI)
+{
+ uint32_t ID = getPointerID(MI);
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CBLOAD),
+ MI->getOperand(0).getReg())
+ .addImm(ID)
+ .addImm(1);
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,275 @@
+//===-- AMDILInliner.cpp --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "amdilinline"
+#include "AMDIL.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+namespace
+{
+class LLVM_LIBRARY_VISIBILITY AMDILInlinePass: public FunctionPass
+
+{
+public:
+ TargetMachine &TM;
+ static char ID;
+ AMDILInlinePass(TargetMachine &tm, CodeGenOpt::Level OL);
+ ~AMDILInlinePass();
+ virtual const char* getPassName() const;
+ virtual bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+private:
+ typedef DenseMap<const ArrayType*, SmallVector<AllocaInst*,
+ DEFAULT_VEC_SLOTS> > InlinedArrayAllocasTy;
+ bool
+ AMDILInlineCallIfPossible(CallSite CS,
+ const TargetData *TD,
+ InlinedArrayAllocasTy &InlinedArrayAllocas);
+ CodeGenOpt::Level OptLevel;
+};
+char AMDILInlinePass::ID = 0;
+} // anonymouse namespace
+
+
+namespace llvm
+{
+FunctionPass*
+createAMDILInlinePass(TargetMachine &tm, CodeGenOpt::Level OL)
+{
+ return new AMDILInlinePass(tm, OL);
+}
+} // llvm namespace
+
+AMDILInlinePass::AMDILInlinePass(TargetMachine &tm, CodeGenOpt::Level OL)
+ : FunctionPass(ID), TM(tm)
+{
+ OptLevel = OL;
+}
+AMDILInlinePass::~AMDILInlinePass()
+{
+}
+
+
+bool
+AMDILInlinePass::AMDILInlineCallIfPossible(CallSite CS,
+ const TargetData *TD, InlinedArrayAllocasTy &InlinedArrayAllocas)
+{
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+
+ // Try to inline the function. Get the list of static allocas that were
+ // inlined.
+ SmallVector<AllocaInst*, 16> StaticAllocas;
+ InlineFunctionInfo IFI;
+ if (!InlineFunction(CS, IFI))
+ return false;
+ DEBUG(errs() << "<amdilinline> function " << Caller->getName()
+ << ": inlined call to "<< Callee->getName() << "\n");
+
+ // If the inlined function had a higher stack protection level than the
+ // calling function, then bump up the caller's stack protection level.
+ if (Callee->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtectReq);
+ else if (Callee->hasFnAttr(Attribute::StackProtect) &&
+ !Caller->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtect);
+
+
+ // Look at all of the allocas that we inlined through this call site. If we
+ // have already inlined other allocas through other calls into this function,
+ // then we know that they have disjoint lifetimes and that we can merge them.
+ //
+ // There are many heuristics possible for merging these allocas, and the
+ // different options have different tradeoffs. One thing that we *really*
+ // don't want to hurt is SRoA: once inlining happens, often allocas are no
+ // longer address taken and so they can be promoted.
+ //
+ // Our "solution" for that is to only merge allocas whose outermost type is an
+ // array type. These are usually not promoted because someone is using a
+ // variable index into them. These are also often the most important ones to
+ // merge.
+ //
+ // A better solution would be to have real memory lifetime markers in the IR
+ // and not have the inliner do any merging of allocas at all. This would
+ // allow the backend to do proper stack slot coloring of all allocas that
+ // *actually make it to the backend*, which is really what we want.
+ //
+ // Because we don't have this information, we do this simple and useful hack.
+ //
+ SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+
+ // Loop over all the allocas we have so far and see if they can be merged with
+ // a previously inlined alloca. If not, remember that we had it.
+
+ for (unsigned AllocaNo = 0,
+ e = IFI.StaticAllocas.size();
+ AllocaNo != e; ++AllocaNo) {
+
+ AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+
+ // Don't bother trying to merge array allocations (they will usually be
+ // canonicalized to be an allocation *of* an array), or allocations whose
+ // type is not itself an array (because we're afraid of pessimizing SRoA).
+ const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+ if (ATy == 0 || AI->isArrayAllocation())
+ continue;
+
+ // Get the list of all available allocas for this array type.
+ SmallVector<AllocaInst*, DEFAULT_VEC_SLOTS> &AllocasForType
+ = InlinedArrayAllocas[ATy];
+
+ // Loop over the allocas in AllocasForType to see if we can reuse one. Note
+ // that we have to be careful not to reuse the same "available" alloca for
+ // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+ // set to keep track of which "available" allocas are being used by this
+ // function. Also, AllocasForType can be empty of course!
+ bool MergedAwayAlloca = false;
+ for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+ AllocaInst *AvailableAlloca = AllocasForType[i];
+
+ // The available alloca has to be in the right function, not in some other
+ // function in this SCC.
+ if (AvailableAlloca->getParent() != AI->getParent())
+ continue;
+
+ // If the inlined function already uses this alloca then we can't reuse
+ // it.
+ if (!UsedAllocas.insert(AvailableAlloca))
+ continue;
+
+ // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+ // success!
+ DEBUG(errs() << " ***MERGED ALLOCA: " << *AI);
+
+ AI->replaceAllUsesWith(AvailableAlloca);
+ AI->eraseFromParent();
+ MergedAwayAlloca = true;
+ break;
+ }
+
+ // If we already nuked the alloca, we're done with it.
+ if (MergedAwayAlloca)
+ continue;
+
+ // If we were unable to merge away the alloca either because there are no
+ // allocas of the right type available or because we reused them all
+ // already, remember that this alloca came from an inlined function and mark
+ // it used so we don't reuse it for other allocas from this inline
+ // operation.
+ AllocasForType.push_back(AI);
+ UsedAllocas.insert(AI);
+ }
+
+ return true;
+}
+
+bool
+AMDILInlinePass::runOnFunction(Function &MF)
+{
+ Function *F = &MF;
+ const AMDILSubtarget &STM = TM.getSubtarget<AMDILSubtarget>();
+ if (STM.device()->isSupported(AMDILDeviceInfo::NoInline)) {
+ return false;
+ }
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ SmallVector<CallSite, 16> CallSites;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ CallSite CS = CallSite(cast<Value>(I));
+ // If this isn't a call, or it is a call to an intrinsic, it can
+ // never be inlined.
+ if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
+ continue;
+
+ // If this is a direct call to an external function, we can never inline
+ // it. If it is an indirect call, inlining may resolve it to be a
+ // direct call, so we keep it.
+ if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+ continue;
+
+ // We don't want to inline if we are recursive.
+ if (CS.getCalledFunction() && CS.getCalledFunction()->getName() == MF.getName()) {
+ AMDILMachineFunctionInfo *MFI =
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ MFI->addErrorMsg(amd::CompilerErrorMessage[RECURSIVE_FUNCTION]);
+ continue;
+ }
+
+ CallSites.push_back(CS);
+ }
+ }
+
+ InlinedArrayAllocasTy InlinedArrayAllocas;
+ bool Changed = false;
+ for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+ CallSite CS = CallSites[CSi];
+
+ Function *Callee = CS.getCalledFunction();
+
+ // We can only inline direct calls to non-declarations.
+ if (Callee == 0 || Callee->isDeclaration()) continue;
+
+ // Attempt to inline the function...
+ if (!AMDILInlineCallIfPossible(CS, TD, InlinedArrayAllocas))
+ continue;
+ Changed = true;
+ }
+ return Changed;
+}
+
+const char*
+AMDILInlinePass::getPassName() const
+{
+ return "AMDIL Inline Function Pass";
+}
+bool
+AMDILInlinePass::doInitialization(Module &M)
+{
+ return false;
+}
+
+bool
+AMDILInlinePass::doFinalization(Module &M)
+{
+ return false;
+}
+
+void
+AMDILInlinePass::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,33 @@
+//===-- AMDILInstPrinter.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILInstPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+AMDILInstPrinter::AMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI)
+{
+}
+void
+AMDILInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef annot)
+{
+ llvm_unreachable("unsupported");
+}
+
+AMDILInstPrinter::~AMDILInstPrinter()
+{
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,37 @@
+//===-- AMDILInstPrinter.h ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILMINSTPRINTER_H_
+#define AMDILMINSTPRINTER_H_
+#include "AMDILLLVMVersion.h"
+#include "AMDILLLVMPC.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm
+{
+class MCAsmInfo;
+class MCInst;
+class raw_ostream;
+// FIXME: We will need to implement this class when we transition to use
+// MCStreamer.
+class AMDILInstPrinter : public MCInstPrinter
+{
+public:
+ virtual ~AMDILInstPrinter();
+ AMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI);
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef annot);
+};
+
+} // namespace llvm
+
+#endif // AMDILMINSTPRINTER_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrConversion.macros
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrConversion.macros?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrConversion.macros (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrConversion.macros Tue Aug 14 16:38:58 2012
@@ -0,0 +1,274 @@
+
+
+#define _32BIT_EXTENSION(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(IL_AS##TYPEINSTR##_i32 newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 RSV)))>; newline; newline;
+
+#define _32BIT_EXTENSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV, VTYPE, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(IL_AS##TYPEINSTR##_##VTYPE##i32 newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 RSV))))>; newline; newline;
+
+#define _64BIT_ZEXTENSION(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV, MASK) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(LCREATE newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 RSV)), newline; \
+(LOADCONST_i32 0))>; newline; newline;
+
+#define _64BIT_ZEXTENSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(LCREATE_##VTYPE##i64 newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 RSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 0)))>; newline; newline;
+
+#define _64BIT_SEXTENSION(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV, MASK) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(LCREATE newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 RSV)), newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 31)))>; newline; newline;
+
+#define _64BIT_SEXTENSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, TYPEINSTR, SHRINSTR, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(LCREATE_##VTYPE##i64 newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 RSV))), newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 31))))>; newline; newline;
+
+#define _32BIT_F2I_CONVERSION(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, SHRINSTR, LSV, RSV, MASK) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO newline; \
+(FPINSTR newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 RSV))))>; newline; newline;
+
+#define _32BIT_F2I_CONVERSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, SHRINSTR, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO newline; \
+(FPINSTR newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 RSV)))))>; newline; newline;
+
+#define _32BIT_I2F_CONVERSION(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, MASK, TYPEINSTR) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO newline; \
+(IL_AS##TYPEINSTR##_i32 newline; \
+(BINARY_AND_i32 newline; \
+(FPINSTR REGTYPE:$src), newline; \
+(LOADCONST_i32 MASK))))>; newline; newline;
+
+#define _32BIT_I2F_CONVERSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, MASK, TYPEINSTR, VTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO newline; \
+(IL_AS##TYPEINSTR##_##VTYPE##i32 newline; \
+(BINARY_AND_##VTYPE##i32 newline; \
+(FPINSTR REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 MASK)))))>; newline; newline;
+
+#define _32BIT_D2I_CONVERSION(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, SHRINSTR, LSV, RSV, MASK) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO (FTOD newline; \
+(FPINSTR newline; \
+(SHRINSTR##_i32 newline; \
+(SHL_i32 newline; \
+(IL_ASINT_##FROM REGTYPE:$src), newline; \
+(LOADCONST_i32 LSV)), newline; \
+(LOADCONST_i32 RSV)))))>; newline; newline;
+
+#define _32BIT_D2I_CONVERSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, SHRINSTR, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO (FTOD_##VTYPE##f64 newline; \
+(FPINSTR newline; \
+(SHRINSTR##_##VTYPE##i32 newline; \
+(SHLVEC_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM REGTYPE:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 LSV))), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 RSV))))))>; newline; newline;
+
+#define _32BIT_I2D_CONVERSION(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, MASK, TYPEINSTR) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO newline; \
+(IL_AS##TYPEINSTR##_i32 newline; \
+(BINARY_AND_i32 newline; \
+(FPINSTR (DTOF REGTYPE:$src)), newline; \
+(LOADCONST_i32 MASK))))>; newline; newline;
+
+#define _32BIT_I2D_CONVERSION_VEC(FI, TI, FROM, TO, REGTYPE, INSTR, FPINSTR, MASK, TYPEINSTR, VTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (INSTR REGTYPE:$src)), newline; \
+(TO newline; \
+(IL_AS##TYPEINSTR##_##VTYPE##i32 newline; \
+(BINARY_AND_##VTYPE##i32 newline; \
+(FPINSTR (DTOF_##VTYPE##f32 REGTYPE:$src)), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 MASK)))))>; newline; newline;
+
+#define _32BIT_TRUNCATE(FI, TI, FROM, TO, REGTYPE, ASTYPE, MASK, BITSHIFT) \
+def FI##to##TI##_##TO : Pat<(TO (trunc REGTYPE:$src)), newline; \
+(IL_AS##ASTYPE##_i32 newline; \
+(IL_ASINT_##FROM newline; \
+(BINARY_AND_##FROM REGTYPE:$src, newline; \
+(LOADCONST_##FROM MASK))) newline;\
+)>; newline; newline;
+
+#define _32BIT_TRUNCATE_VEC(FI, TI, FROM, TO, REGTYPE, ASTYPE, MASK, VTYPE, SCALARFROM, BITSHIFT, ASVTYPE) \
+def FI##to##TI##_##TO : Pat<(TO (trunc REGTYPE:$src)), newline; \
+(IL_AS##ASTYPE##_##VTYPE##i32 newline; \
+(IL_AS##ASVTYPE##INT_##FROM newline; \
+(BINARY_AND_##FROM REGTYPE:$src, newline; \
+(VCREATE_##VTYPE##SCALARFROM (LOADCONST_##SCALARFROM MASK)))) newline; \
+)>; newline; newline;
+
+#define _64BIT_TRUNCATE(TI, TO, ASTYPE, MASK, BITSHIFT) \
+def lto##TI##_##TO : Pat<(TO (trunc GPRI64:$src)), newline; \
+(IL_AS##ASTYPE##_i32 newline; \
+(BINARY_AND_i32 newline; \
+(LLO GPRI64:$src), newline; \
+(LOADCONST_i32 MASK)) newline; \
+)>; newline; newline;
+
+#define _64BIT_TRUNCATE_VEC(TI, TO, ASTYPE, MASK, VTYPE, ASVTYPE, BITSHIFT) \
+def lto##TI##_##TO : Pat<(TO (trunc GPR##ASVTYPE##I64:$src)), newline; \
+(IL_AS##ASTYPE##_##VTYPE##i32 newline; \
+(BINARY_AND_##VTYPE##i32 newline; \
+(LLO_##VTYPE##i64 GPR##ASVTYPE##I64:$src), newline; \
+(VCREATE_##VTYPE##i32 (LOADCONST_i32 MASK))) newline; \
+)>; newline; newline;
+
+
+#define GENERATE_32BIT_CONVERT(FI, TI, FROM, TO, FREGTYPE, ASTYPE, LSV, RSV) \
+_32BIT_EXTENSION(a##FI, TI, FROM, TO, FREGTYPE, anyext, ASTYPE, USHR, LSV, RSV) \
+_32BIT_EXTENSION(u##FI, TI, FROM, TO, FREGTYPE, zext, ASTYPE, USHR, LSV, RSV) \
+_32BIT_EXTENSION(s##FI, TI, FROM, TO, FREGTYPE, sext, ASTYPE, SHR, LSV, RSV) \
+
+#define GENERATE_64BIT_CONVERT(FI, TI, FROM, TO, FREGTYPE, ASTYPE, LSV, RSV, MASK) \
+_64BIT_ZEXTENSION(a##FI, TI, FROM, TO, FREGTYPE, anyext, ASTYPE, USHR, LSV, RSV, MASK) \
+_64BIT_ZEXTENSION(u##FI, TI, FROM, TO, FREGTYPE, zext, ASTYPE, USHR, LSV, RSV, MASK) \
+_64BIT_SEXTENSION(s##FI, TI, FROM, TO, FREGTYPE, sext, ASTYPE, SHR, LSV, RSV, MASK) \
+
+#define GENERATE_32BIT_FP_CONVERSION(FI, FROM, IREGTYPE, ASTYPE, LSV, RSV, MASK) \
+_32BIT_F2I_CONVERSION(s##FI, f, FROM, f32, IREGTYPE, sint_to_fp, ITOF, SHR, LSV, RSV, MASK) \
+_32BIT_F2I_CONVERSION(u##FI, f, FROM, f32, IREGTYPE, uint_to_fp, UTOF, USHR, LSV, RSV, MASK) \
+_32BIT_I2F_CONVERSION(f, s##FI, f32, FROM, GPRF32, fp_to_sint, FTOI, MASK, ASTYPE) \
+_32BIT_I2F_CONVERSION(f, u##FI, f32, FROM, GPRF32, fp_to_uint, FTOU, MASK, ASTYPE) \
+_32BIT_D2I_CONVERSION(s##FI, d, FROM, f64, IREGTYPE, sint_to_fp, ITOF, SHR, LSV, RSV, MASK) \
+_32BIT_D2I_CONVERSION(u##FI, d, FROM, f64, IREGTYPE, uint_to_fp, UTOF, USHR, LSV, RSV, MASK) \
+_32BIT_I2D_CONVERSION(d, s##FI, f64, FROM, GPRF64, fp_to_sint, FTOI, MASK, ASTYPE) \
+_32BIT_I2D_CONVERSION(d, u##FI, f64, FROM, GPRF64, fp_to_uint, FTOU, MASK, ASTYPE)
+
+// Generate the conversion rountines
+GENERATE_32BIT_CONVERT(c, s, i8, i16, GPRI8, SHORT, 24, 24)
+GENERATE_32BIT_CONVERT(c, i, i8, i32, GPRI8, INT, 24, 24)
+GENERATE_64BIT_CONVERT(c, l, i8, i64, GPRI8, LONG, 24, 24, 0x000000FF)
+GENERATE_32BIT_CONVERT(s, i, i16, i32, GPRI16, INT, 16, 16)
+GENERATE_64BIT_CONVERT(s, l, i16, i64, GPRI16, LONG, 16, 16, 0x0000FFFF)
+GENERATE_64BIT_CONVERT(i, l, i32, i64, GPRI32, LONG, 0, 0, 0xFFFFFFFF)
+
+GENERATE_32BIT_FP_CONVERSION(c, i8, GPRI8, CHAR, 24, 24, 0x000000FF)
+GENERATE_32BIT_FP_CONVERSION(s, i16, GPRI16, SHORT, 16, 16, 0x0000FFFF)
+
+
+// Truncation Routines
+_32BIT_TRUNCATE(s, c, i16, i8, GPRI16, CHAR, 0x000000FF, 24)
+_32BIT_TRUNCATE(i, c, i32, i8, GPRI32, CHAR, 0x000000FF, 24)
+_32BIT_TRUNCATE(i, s, i32, i16, GPRI32, SHORT, 0x0000FFFF, 16)
+_64BIT_TRUNCATE(c, i8, CHAR, 0x000000FF, 24)
+_64BIT_TRUNCATE(s, i16, SHORT, 0x0000FFFF, 16)
+_64BIT_TRUNCATE(i, i32, INT, 0xFFFFFFFF, 0)
+
+// Vector Rountines
+#define GENERATE_32BIT_CONVERT_VEC(FI, TI, FROM, TO, FREGTYPE, ASTYPE, LSV, RSV, VTYPE, ASVTYPE) \
+_32BIT_EXTENSION_VEC(a##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE, anyext, ASTYPE, USHRVEC, LSV, RSV, VTYPE, ASVTYPE) \
+_32BIT_EXTENSION_VEC(u##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE, zext, ASTYPE, USHRVEC, LSV, RSV, VTYPE, ASVTYPE) \
+_32BIT_EXTENSION_VEC(s##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE, sext, ASTYPE, SHRVEC, LSV, RSV, VTYPE, ASVTYPE) \
+
+#define GENERATE_64BIT_CONVERT_VEC(FI, TI, FROM, TO, FREGTYPE, ASTYPE, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_64BIT_ZEXTENSION_VEC(a##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE, anyext, ASTYPE, USHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_64BIT_ZEXTENSION_VEC(u##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE, zext, ASTYPE, USHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_64BIT_SEXTENSION_VEC(s##FI, TI, VTYPE##FROM, VTYPE##TO, FREGTYPE, sext, ASTYPE, SHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+
+#define GENERATE_32BIT_FP_CONVERSION_VEC2(FI, FROM, IREGTYPE, ASTYPE, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_F2I_CONVERSION_VEC(s##FI, f, VTYPE##FROM, VTYPE##f32, IREGTYPE, sint_to_fp, ITOF_##VTYPE##f32, SHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_F2I_CONVERSION_VEC(u##FI, f, VTYPE##FROM, VTYPE##f32, IREGTYPE, uint_to_fp, UTOF_##VTYPE##f32, USHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_I2F_CONVERSION_VEC(f, s##FI, VTYPE##f32, VTYPE##FROM, GPRV2F32, fp_to_sint, FTOI_##VTYPE##i32, MASK, ASTYPE, VTYPE) \
+_32BIT_I2F_CONVERSION_VEC(f, u##FI, VTYPE##f32, VTYPE##FROM, GPRV2F32, fp_to_uint, FTOU_##VTYPE##i32, MASK, ASTYPE, VTYPE) \
+_32BIT_D2I_CONVERSION_VEC(s##FI, d, VTYPE##FROM, VTYPE##f64, IREGTYPE, sint_to_fp, ITOF_##VTYPE##f32, SHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_D2I_CONVERSION_VEC(u##FI, d, VTYPE##FROM, VTYPE##f64, IREGTYPE, uint_to_fp, UTOF_##VTYPE##f32, USHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_I2D_CONVERSION_VEC(d, s##FI, VTYPE##f64, VTYPE##FROM, GPRV2F64, fp_to_sint, FTOI_##VTYPE##i32, MASK, ASTYPE, VTYPE) \
+_32BIT_I2D_CONVERSION_VEC(d, u##FI, VTYPE##f64, VTYPE##FROM, GPRV2F64, fp_to_uint, FTOU_##VTYPE##i32, MASK, ASTYPE, VTYPE)
+
+#define GENERATE_32BIT_FP_CONVERSION_VEC4(FI, FROM, IREGTYPE, ASTYPE, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_F2I_CONVERSION_VEC(s##FI, f, VTYPE##FROM, VTYPE##f32, IREGTYPE, sint_to_fp, ITOF_##VTYPE##f32, SHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_F2I_CONVERSION_VEC(u##FI, f, VTYPE##FROM, VTYPE##f32, IREGTYPE, uint_to_fp, UTOF_##VTYPE##f32, USHRVEC, LSV, RSV, MASK, VTYPE, ASVTYPE) \
+_32BIT_I2F_CONVERSION_VEC(f, s##FI, VTYPE##f32, VTYPE##FROM, GPRV4F32, fp_to_sint, FTOI_##VTYPE##i32, MASK, ASTYPE, VTYPE) \
+_32BIT_I2F_CONVERSION_VEC(f, u##FI, VTYPE##f32, VTYPE##FROM, GPRV4F32, fp_to_uint, FTOU_##VTYPE##i32, MASK, ASTYPE, VTYPE)
+
+// Generate the conversion rountines for 2 component vectors
+GENERATE_32BIT_CONVERT_VEC(c, s, i8, i16, GPRV2I8, V2SHORT, 24, 24, v2, V2)
+GENERATE_32BIT_CONVERT_VEC(c, i, i8, i32, GPRV2I8, V2INT, 24, 24, v2, V2)
+GENERATE_64BIT_CONVERT_VEC(c, l, i8, i64, GPRV2I8, V2LONG, 24, 24, 0x000000FF, v2, V2)
+GENERATE_32BIT_CONVERT_VEC(s, i, i16, i32, GPRV2I16, V2INT, 16, 16, v2, V2)
+GENERATE_64BIT_CONVERT_VEC(s, l, i16, i64, GPRV2I16, V2LONG, 16, 16, 0x0000FFFF, v2, V2)
+GENERATE_64BIT_CONVERT_VEC(i, l, i32, i64, GPRV2I32, V2LONG, 0, 0, 0xFFFFFFFF, v2, V2)
+
+GENERATE_32BIT_FP_CONVERSION_VEC2(c, i8, GPRV2I8, V2CHAR, 24, 24, 0x000000FF, v2, V2)
+GENERATE_32BIT_FP_CONVERSION_VEC2(s, i16, GPRV2I16, V2SHORT, 16, 16, 0x0000FFFF, v2, V2)
+
+
+// Truncation Routines
+_32BIT_TRUNCATE_VEC(s, c, v2i16, v2i8, GPRV2I16, V2CHAR, 0x000000FF, v2, i16, 24, V2)
+_32BIT_TRUNCATE_VEC(i, c, v2i32, v2i8, GPRV2I32, V2CHAR, 0x000000FF, v2, i32, 24, V2)
+_32BIT_TRUNCATE_VEC(i, s, v2i32, v2i16, GPRV2I32, V2SHORT, 0x0000FFFF, v2, i32, 16, V2)
+_64BIT_TRUNCATE_VEC(c, v2i8, V2CHAR, 0x000000FF, v2, V2, 24)
+_64BIT_TRUNCATE_VEC(s, v2i16, V2SHORT, 0x0000FFFF, v2, V2, 16)
+_64BIT_TRUNCATE_VEC(i, v2i32, V2INT, 0xFFFFFFFF, v2, V2, 0)
+
+// Generate the conversion rountines for 4 component vectors
+GENERATE_32BIT_CONVERT_VEC(c, s, i8, i16, GPRV4I8, V4SHORT, 24, 24, v4, V4)
+GENERATE_32BIT_CONVERT_VEC(c, i, i8, i32, GPRV4I8, V4INT, 24, 24, v4, V4)
+GENERATE_32BIT_CONVERT_VEC(s, i, i16, i32, GPRV4I16, V4INT, 16, 16, v4, V4)
+
+GENERATE_32BIT_FP_CONVERSION_VEC4(c, i8, GPRV4I8, V4CHAR, 24, 24, 0x000000FF, v4, V4)
+GENERATE_32BIT_FP_CONVERSION_VEC4(s, i16, GPRV4I16, V4SHORT, 16, 16, 0x0000FFFF, v4, V4)
+
+
+// Truncation Routines
+_32BIT_TRUNCATE_VEC(s, c, v4i16, v4i8, GPRV4I16, V4CHAR, 0x000000FF, v4, i16, 24, V4)
+_32BIT_TRUNCATE_VEC(i, c, v4i32, v4i8, GPRV4I32, V4CHAR, 0x000000FF, v4, i32, 24, V4)
+_32BIT_TRUNCATE_VEC(i, s, v4i32, v4i16, GPRV4I32, V4SHORT, 0x0000FFFF, v4, i32, 16, V4)
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,923 @@
+//===-- AMDILInstrInfo.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "AMDILUtilityFunctions.h"
+#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_MC_DESC
+#include "AMDILGenInstrInfo.inc"
+#include "AMDILMachineFunctionInfo.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+AMDILInstrInfo::AMDILInstrInfo(AMDILTargetMachine &tm)
+ : AMDILGenInstrInfo(AMDIL::ADJCALLSTACKDOWN, AMDIL::ADJCALLSTACKUP),
+ RI(tm, *this),
+ TM(tm)
+{
+}
+
+const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const
+{
+ return RI;
+}
+
+/// Return true if the instruction is a register to register move and leave the
+/// source and dest operands in the passed parameters.
+bool AMDILInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
+ unsigned int &DstReg, unsigned int &SrcSubIdx,
+ unsigned int &DstSubIdx) const
+{
+ // FIXME: we should look for:
+ // add with 0
+ //assert(0 && "is Move Instruction has not been implemented yet!");
+ //return true;
+ if (!isMove(MI.getOpcode())) {
+ return false;
+ }
+ if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) {
+ return false;
+ }
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ DstSubIdx = 0;
+ SrcSubIdx = 0;
+ return true;
+}
+
+bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const
+{
+ return false;
+ unsigned opc = MI.getOpcode();
+ SubIdx = llvm::NoSubRegister;
+ switch (opc) {
+ default:
+ return false;
+ case AMDIL::DHI:
+ case AMDIL::LHI:
+ if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+ // Be conservative.
+ return false;
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SubIdx = llvm::sub_y_comp;
+ break;
+ case AMDIL::DLO:
+ case AMDIL::LLO:
+ if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+ // Be conservative.
+ return false;
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SubIdx = llvm::sub_x_comp;
+ break;
+ case AMDIL::VEXTRACT_v2f64:
+ case AMDIL::VEXTRACT_v2i64:
+ if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+ // Be conservative.
+ return false;
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ assert(MI.getOperand(2).isImm()
+ && "Operand 2 must be an immediate value!");
+ switch (MI.getOperand(2).getImm()) {
+ case 0:
+ SubIdx = llvm::sub_xy_comp;
+ break;
+ case 1:
+ SubIdx = llvm::sub_zw_comp;
+ break;
+ default:
+ return false;
+ };
+ case AMDIL::VEXTRACT_v2f32:
+ case AMDIL::VEXTRACT_v2i32:
+ case AMDIL::VEXTRACT_v2i16:
+ case AMDIL::VEXTRACT_v2i8:
+ case AMDIL::VEXTRACT_v4f32:
+ case AMDIL::VEXTRACT_v4i32:
+ case AMDIL::VEXTRACT_v4i16:
+ case AMDIL::VEXTRACT_v4i8:
+ if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+ // Be conservative.
+ return false;
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ assert(MI.getOperand(2).isImm()
+ && "Operand 2 must be an immediate value!");
+ switch (MI.getOperand(2).getImm()) {
+ case 0:
+ SubIdx = llvm::sub_x_comp;
+ break;
+ case 1:
+ SubIdx = llvm::sub_y_comp;
+ break;
+ case 2:
+ SubIdx = llvm::sub_z_comp;
+ break;
+ case 3:
+ SubIdx = llvm::sub_w_comp;
+ break;
+ default:
+ return false;
+ };
+ };
+ return SubIdx != llvm::NoSubRegister;
+}
+
+unsigned AMDILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const
+{
+ if (isPrivateInst(TM, MI) && isLoadInst(TM, MI) && MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ return 0;
+}
+
+unsigned AMDILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const
+{
+ if (isPrivateInst(TM, MI) && isLoadInst(TM, MI) && MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ return 0;
+}
+
+bool AMDILInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const
+{
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isLoad() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+unsigned AMDILInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const
+{
+ if (isPrivateInst(TM, MI) && isStoreInst(TM, MI) && MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ return 0;
+}
+unsigned AMDILInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const
+{
+ if (isPrivateInst(TM, MI) && isStoreInst(TM, MI) && MI->getOperand(1).isFI()) {
+ unsigned Reg;
+ if ((Reg = isStoreToStackSlot(MI, FrameIndex))) {
+ return Reg;
+ }
+ const MachineMemOperand *Dummy = NULL;
+ return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ }
+ return 0;
+}
+bool AMDILInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const
+{
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isStore() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
+void
+AMDILInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const
+{
+ // TODO: Implement this function
+}
+
+MachineInstr*
+AMDILInstrInfo::duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const
+{
+ // TODO: Implement this function
+ return MF.CloneMachineInstr(Orig);
+}
+
+MachineInstr *
+AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const
+{
+ // TODO: Implement this function
+ return NULL;
+}
+
+MachineInstr*
+AMDILInstrInfo::commuteInstruction(MachineInstr *MI,
+ bool NewMI) const
+{
+ // TODO: Implement this function
+ return NULL;
+}
+bool
+AMDILInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const
+{
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDILInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const
+{
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const
+{
+ while (iter != MBB.end()) {
+ switch (iter->getOpcode()) {
+ default:
+ break;
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ case AMDIL::BRANCH:
+ return true;
+ };
+ ++iter;
+ }
+ return false;
+}
+
+bool AMDILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+ bool retVal = true;
+ return retVal;
+ MachineBasicBlock::iterator iter = MBB.begin();
+ if (!getNextBranchInstr(iter, MBB)) {
+ retVal = false;
+ } else {
+ MachineInstr *firstBranch = iter;
+ if (!getNextBranchInstr(++iter, MBB)) {
+ if (firstBranch->getOpcode() == AMDIL::BRANCH) {
+ TBB = firstBranch->getOperand(0).getMBB();
+ firstBranch->eraseFromParent();
+ retVal = false;
+ } else {
+ TBB = firstBranch->getOperand(0).getMBB();
+ FBB = *(MBB.succ_begin()+1);
+ if (FBB == TBB) {
+ FBB = *(MBB.succ_begin());
+ }
+ Cond.push_back(firstBranch->getOperand(1));
+ retVal = false;
+ }
+ } else {
+ MachineInstr *secondBranch = iter;
+ if (!getNextBranchInstr(++iter, MBB)) {
+ if (secondBranch->getOpcode() == AMDIL::BRANCH) {
+ TBB = firstBranch->getOperand(0).getMBB();
+ Cond.push_back(firstBranch->getOperand(1));
+ FBB = secondBranch->getOperand(0).getMBB();
+ secondBranch->eraseFromParent();
+ retVal = false;
+ } else {
+ assert(0 && "Should not have two consecutive conditional branches");
+ }
+ } else {
+ MBB.getParent()->viewCFG();
+ assert(0 && "Should not have three branch instructions in"
+ " a single basic block");
+ retVal = false;
+ }
+ }
+ }
+ return retVal;
+}
+
+unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const
+{
+ const MachineInstr *MI = op.getParent();
+
+ switch (MI->getDesc().OpInfo->RegClass) {
+ default: // FIXME: fallthrough??
+ case AMDIL::GPRI8RegClassID:
+ return AMDIL::BRANCH_COND_i8;
+ case AMDIL::GPRI16RegClassID:
+ return AMDIL::BRANCH_COND_i16;
+ case AMDIL::GPRI32RegClassID:
+ return AMDIL::BRANCH_COND_i32;
+ case AMDIL::GPRI64RegClassID:
+ return AMDIL::BRANCH_COND_i64;
+ case AMDIL::GPRF32RegClassID:
+ return AMDIL::BRANCH_COND_f32;
+ case AMDIL::GPRF64RegClassID:
+ return AMDIL::BRANCH_COND_f64;
+ };
+}
+
+unsigned int
+AMDILInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const
+{
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ for (unsigned int x = 0; x < Cond.size(); ++x) {
+ Cond[x].getParent()->dump();
+ }
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(TBB);
+ } else {
+ BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
+ .addMBB(TBB).addReg(Cond[0].getReg());
+ }
+ return 1;
+ } else {
+ BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
+ .addMBB(TBB).addReg(Cond[0].getReg());
+ BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(FBB);
+ }
+ assert(0 && "Inserting two branches not supported");
+ return 0;
+}
+
+unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) {
+ return 0;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ default:
+ return 0;
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ case AMDIL::BRANCH:
+ I->eraseFromParent();
+ break;
+ }
+ I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 1;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ // FIXME: only one case??
+ default:
+ return 1;
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ I->eraseFromParent();
+ break;
+ }
+ return 2;
+}
+
+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB)
+{
+ MachineBasicBlock::iterator tmp = MBB->end();
+ if (!MBB->size()) {
+ return MBB->end();
+ }
+ while (--tmp) {
+ if (tmp->getOpcode() == AMDIL::ENDLOOP
+ || tmp->getOpcode() == AMDIL::ENDIF
+ || tmp->getOpcode() == AMDIL::ELSE) {
+ if (tmp == MBB->begin()) {
+ return tmp;
+ } else {
+ continue;
+ }
+ } else {
+ return ++tmp;
+ }
+ }
+ return MBB->end();
+}
+
+bool
+AMDILInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const
+{
+ // If we are adding to the end of a basic block we can safely assume that the
+ // move is caused by a PHI node since all move instructions that are non-PHI
+ // have already been inserted into the basic blocks Therefor we call the skip
+ // flow control instruction to move the iterator before the flow control
+ // instructions and put the move instruction there.
+ bool phi = (DestReg < 1025) || (SrcReg < 1025);
+ int movInst = phi ? getMoveInstFromID(DestRC->getID())
+ : getPHIMoveInstFromID(DestRC->getID());
+
+ MachineBasicBlock::iterator iTemp = (I == MBB.end()) ? skipFlowControl(&MBB)
+ : I;
+ if (DestRC != SrcRC) {
+ //int convInst;
+ size_t dSize = DestRC->getSize();
+ size_t sSize = SrcRC->getSize();
+ if (dSize > sSize) {
+ // Elements are going to get duplicated.
+ BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ } else if (dSize == sSize) {
+ // Direct copy, conversions are not handled.
+ BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ } else if (dSize < sSize) {
+ // Elements are going to get dropped.
+ BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ }
+ } else {
+ BuildMI( MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ }
+ return true;
+}
+void
+AMDILInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+ BuildMI(MBB, MI, DL, get(AMDIL::MOVE_v4i32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+#if 0
+ DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+ << " to " << RI.getName(DestReg) << '\n');
+ llvm_unreachable("Cannot emit physreg copy instruction");
+#endif
+}
+void
+AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ unsigned int Opc = 0;
+ MachineFunction &MF = *(MBB.getParent());
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ DebugLoc DL;
+ switch (RC->getID()) {
+ default:
+ Opc = AMDIL::PRIVATESTORE_v4i32;
+ break;
+ case AMDIL::GPRF32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_f32;
+ break;
+ case AMDIL::GPRF64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_f64;
+ break;
+ case AMDIL::GPRI16RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i16;
+ break;
+ case AMDIL::GPRI32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i32;
+ break;
+ case AMDIL::GPRI8RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i8;
+ break;
+ case AMDIL::GPRI64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i64;
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2f32;
+ break;
+ case AMDIL::GPRV2F64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2f64;
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i16;
+ break;
+ case AMDIL::GPRV2I32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i32;
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i8;
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i64;
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4f32;
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4i16;
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4i32;
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4i8;
+ break;
+ }
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineMemOperand *MMO =
+ new MachineMemOperand(
+ MachinePointerInfo::getFixedStack(FrameIndex),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
+ if (MI != MBB.end()) {
+ DL = MI->getDebugLoc();
+ }
+ MachineInstr *nMI = BuildMI(MBB, MI, DL, get(Opc))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIndex)
+ .addMemOperand(MMO)
+ .addImm(0);
+ AMDILMachineFunctionInfo *mfinfo = MF.getInfo<AMDILMachineFunctionInfo>();
+ mfinfo->setUsesScratch();
+ AMDILAS::InstrResEnc curRes;
+ curRes.bits.ResourceID
+ = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ setAsmPrinterFlags(nMI, curRes);
+}
+
+void
+AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ unsigned int Opc = 0;
+ MachineFunction &MF = *(MBB.getParent());
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ DebugLoc DL;
+ switch (RC->getID()) {
+ default:
+ Opc = AMDIL::PRIVATELOAD_v4i32;
+ break;
+ case AMDIL::GPRF32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_f32;
+ break;
+ case AMDIL::GPRF64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_f64;
+ break;
+ case AMDIL::GPRI16RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i16;
+ break;
+ case AMDIL::GPRI32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i32;
+ break;
+ case AMDIL::GPRI8RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i8;
+ break;
+ case AMDIL::GPRI64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i64;
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2f32;
+ break;
+ case AMDIL::GPRV2F64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2f64;
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i16;
+ break;
+ case AMDIL::GPRV2I32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i32;
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i8;
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i64;
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4f32;
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4i16;
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4i32;
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4i8;
+ break;
+ }
+
+ MachineMemOperand *MMO =
+ new MachineMemOperand(
+ MachinePointerInfo::getFixedStack(FrameIndex),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
+ if (MI != MBB.end()) {
+ DL = MI->getDebugLoc();
+ }
+ AMDILMachineFunctionInfo *mfinfo = MF.getInfo<AMDILMachineFunctionInfo>();
+ mfinfo->setUsesScratch();
+ MachineInstr* nMI = BuildMI(MBB, MI, DL, get(Opc))
+ .addReg(DestReg, RegState::Define)
+ .addFrameIndex(FrameIndex)
+ .addMemOperand(MMO)
+ .addImm(0);
+ AMDILAS::InstrResEnc curRes;
+ curRes.bits.ResourceID
+ = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ setAsmPrinterFlags(nMI, curRes);
+
+}
+#if 0
+MachineInstr *
+AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const
+{
+ // TODO: Implement this function
+ return 0;
+}
+MachineInstr*
+AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const
+{
+ // TODO: Implement this function
+ return 0;
+}
+#endif
+
+#if 0
+bool
+AMDILInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const
+{
+ // TODO: Implement this function
+ return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
+}
+bool
+AMDILInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad,
+ bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ // TODO: Implement this function
+ return false;
+}
+
+bool
+AMDILInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const
+{
+ // TODO: Implement this function
+ return false;
+}
+
+unsigned
+AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const
+{
+ // TODO: Implement this function
+ return 0;
+}
+#endif
+bool
+AMDILInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const
+{
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) {
+ return false;
+ }
+ const MachineSDNode *mload1 = dyn_cast<MachineSDNode>(Load1);
+ const MachineSDNode *mload2 = dyn_cast<MachineSDNode>(Load2);
+ if (!mload1 || !mload2) {
+ return false;
+ }
+ if (mload1->memoperands_empty() ||
+ mload2->memoperands_empty()) {
+ return false;
+ }
+ MachineMemOperand *memOp1 = (*mload1->memoperands_begin());
+ MachineMemOperand *memOp2 = (*mload2->memoperands_begin());
+ const Value *mv1 = memOp1->getValue();
+ const Value *mv2 = memOp2->getValue();
+ if (!memOp1->isLoad() || !memOp2->isLoad()) {
+ return false;
+ }
+ if (getBasePointerValue(mv1) == getBasePointerValue(mv2)) {
+ if (isa<GetElementPtrInst>(mv1) && isa<GetElementPtrInst>(mv2)) {
+ const GetElementPtrInst *gep1 = dyn_cast<GetElementPtrInst>(mv1);
+ const GetElementPtrInst *gep2 = dyn_cast<GetElementPtrInst>(mv2);
+ if (!gep1 || !gep2) {
+ return false;
+ }
+ if (gep1->getNumOperands() != gep2->getNumOperands()) {
+ return false;
+ }
+ for (unsigned i = 0, e = gep1->getNumOperands() - 1; i < e; ++i) {
+ const Value *op1 = gep1->getOperand(i);
+ const Value *op2 = gep2->getOperand(i);
+ if (op1 != op2) {
+ // If any value except the last one is different, return false.
+ return false;
+ }
+ }
+ unsigned size = gep1->getNumOperands()-1;
+ if (!isa<ConstantInt>(gep1->getOperand(size))
+ || !isa<ConstantInt>(gep2->getOperand(size))) {
+ return false;
+ }
+ Offset1 = dyn_cast<ConstantInt>(gep1->getOperand(size))->getSExtValue();
+ Offset2 = dyn_cast<ConstantInt>(gep2->getOperand(size))->getSExtValue();
+ return true;
+ } else if (isa<Argument>(mv1) && isa<Argument>(mv2)) {
+ return false;
+ } else if (isa<GlobalValue>(mv1) && isa<GlobalValue>(mv2)) {
+ return false;
+ }
+ }
+ return false;
+}
+
+bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const
+{
+ LoadSDNode *LoadSD1 = dyn_cast<LoadSDNode>(Load1);
+ LoadSDNode *LoadSD2 = dyn_cast<LoadSDNode>(Load2);
+ if (!LoadSD1 || !LoadSD2) {
+ return false;
+ }
+ // We only care about scheduling loads near for global address space.
+ if (dyn_cast<PointerType>(LoadSD1->getSrcValue()->getType())
+ ->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) {
+ return false;
+ }
+ // We only care about scheduling loads near for global address space.
+ if (dyn_cast<PointerType>(LoadSD2->getSrcValue()->getType())
+ ->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) {
+ return false;
+ }
+ assert(Offset2 > Offset1
+ && "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 16,
+ // then schedule together.
+ // TODO: Make the loads schedule near if it fits in a cacheline
+ return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+}
+
+bool AMDILInstrInfo::shouldScheduleWithNormalPriority(SDNode* instruction) const
+{
+ if (instruction->isMachineOpcode()) {
+ unsigned int Opc = instruction->getMachineOpcode();
+ switch(Opc) {
+ case AMDIL::BARRIER_7XX:
+ case AMDIL::BARRIER_EGNI:
+ case AMDIL::BARRIER_LOCAL:
+ case AMDIL::BARRIER_GLOBAL:
+ case AMDIL::BARRIER_REGION:
+ case AMDIL::FENCE:
+ case AMDIL::FENCE_LOCAL:
+ case AMDIL::FENCE_GLOBAL:
+ case AMDIL::FENCE_REGION:
+ case AMDIL::FENCE_READ_ONLY:
+ case AMDIL::FENCE_READ_ONLY_LOCAL:
+ case AMDIL::FENCE_READ_ONLY_GLOBAL:
+ case AMDIL::FENCE_READ_ONLY_REGION:
+ case AMDIL::FENCE_WRITE_ONLY:
+ case AMDIL::FENCE_WRITE_ONLY_LOCAL:
+ case AMDIL::FENCE_WRITE_ONLY_GLOBAL:
+ case AMDIL::FENCE_WRITE_ONLY_REGION:
+ return true; // Maybe other instructions will need to be added to this?
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
+bool
+AMDILInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+const
+{
+ // TODO: Implement this function
+ return true;
+}
+void AMDILInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const
+{
+ // TODO: Implement this function
+}
+
+bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const
+{
+ // TODO: Implement this function
+ return false;
+}
+bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const
+{
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDILInstrInfo::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const
+{
+ // TODO: Implement this function
+ return false;
+}
+
+bool
+AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2)
+const
+{
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDILInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const
+{
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const
+{
+ // TODO: Implement this function
+ return MI->getDesc().isPredicable();
+}
+
+bool
+AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const
+{
+ // TODO: Implement this function
+ return true;
+}
+
+unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const
+{
+ // TODO: Implement this function
+ return 0;
+}
+
+
+unsigned
+AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const
+{
+ // TODO: Implement this function
+ return 0;
+}
+
+unsigned AMDILInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const
+{
+ // TODO: Implement this function
+ return 0;
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,195 @@
+//===-- AMDILInstrInfo.h --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILINSTRUCTIONINFO_H_
+#define AMDILINSTRUCTIONINFO_H_
+
+#include "AMDIL.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include "AMDILRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AMDILGenInstrInfo.inc"
+
+namespace llvm
+{
+// AMDIL - This namespace holds all of the target specific flags that
+// instruction info tracks.
+//
+//class AMDILTargetMachine;
+class AMDILInstrInfo : public AMDILGenInstrInfo
+{
+private:
+ const AMDILRegisterInfo RI;
+ AMDILTargetMachine &TM;
+ bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const;
+ unsigned int getBranchInstr(const MachineOperand &op) const;
+public:
+ explicit AMDILInstrInfo(AMDILTargetMachine &tm);
+
+ // getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ // such, whenever a client has an instance of instruction info, it should
+ // always be able to get register info as well (through this method).
+ const AMDILRegisterInfo &getRegisterInfo() const;
+
+ // Return true if the instruction is a register to register move and leave the
+ // source and dest operands in the passed parameters.
+ bool isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
+ unsigned int &DstReg, unsigned int &SrcSubIdx,
+ unsigned int &DstSubIdx) const;
+
+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &DstReg, unsigned &SubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+
+
+
+ void reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo &TRI) const;
+
+ MachineInstr *duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const;
+
+ MachineInstr *
+ convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ MachineInstr *commuteInstruction(MachineInstr *MI,
+ bool NewMI = false) const;
+ bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const;
+ bool produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI = 0) const;
+
+
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ unsigned
+ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+protected:
+#if 0
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const;
+#endif
+public:
+#if 0
+ bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const;
+ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode *> &NewNodes) const;
+ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = 0) const;
+#endif
+ bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2) const;
+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ /// Schedule BARRIER instructions differently.
+ /// Schedule this instruction based entirely on it's Sethi-Ullman number,
+ /// without raising or lowering it's priority based on use or def numbers.
+ /// What this really says is that the instruction has some effect on execution
+ /// that is not modeled in the DAG. (For instance, a multi-thread execution
+ /// barrier.) On the GPU AMDIL backend, moving these instructions too far up
+ /// or down in the execution can artificially constrain the scheduling in the
+ /// shared compiler.
+ bool shouldScheduleWithNormalPriority(SDNode* instruction) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool isPredicated(const MachineInstr *MI) const;
+
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ bool isPredicable(MachineInstr *MI) const;
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
+ unsigned getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const;
+
+};
+
+}
+
+#endif // AMDILINSTRINFO_H_
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,132 @@
+//===-- AMDILInstrInfo.td -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the AMDIL instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def HasHWDDiv : Predicate<"Subtarget->device()"
+ "->getGeneration() > AMDILDeviceInfo::HD4XXX && "
+ "Subtarget->device()->usesHardware(AMDILDeviceInfo::DoubleOps) && "
+ "(Subtarget->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX ||"
+ "Subtarget->calVersion() < CAL_VERSION_SC_155)">;
+
+// Predicate that is set to true if the hardware supports double, but not double
+// precision divide in hardware
+def HasSWDDiv : Predicate<"Subtarget->device()"
+ "->getGeneration() == AMDILDeviceInfo::HD4XXX &&"
+ "Subtarget->device()->usesHardware(AMDILDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware support 24bit signed
+// math ops. Otherwise a software expansion to 32bit math ops is used instead.
+def HasHWSign24Bit : Predicate<"Subtarget->device()"
+ "->getGeneration() > AMDILDeviceInfo::HD5XXX">;
+
+// Predicate that is set to true if 64bit operations are supported or not
+def HasHW64Bit : Predicate<"Subtarget->device()"
+ "->usesHardware(AMDILDeviceInfo::LongOps)">;
+def HasSW64Bit : Predicate<"Subtarget->device()"
+ "->usesSoftware(AMDILDeviceInfo::LongOps)">;
+
+// Predicate that is set to true if the timer register is supported
+def HasTmrRegister : Predicate<"Subtarget->device()"
+ "->isSupported(AMDILDeviceInfo::TmrReg)">;
+
+// Predicate that is true if we have region address space.
+def hasRegionAS : Predicate<"Subtarget->device()"
+ "->usesHardware(AMDILDeviceInfo::RegionMem)">;
+
+// Predicate that is false if we don't have region address space.
+def noRegionAS : Predicate<"!Subtarget->device()"
+ "->isSupported(AMDILDeviceInfo::RegionMem)">;
+
+
+// Predicate that is set to true if 64bit Mul is supported in the IL or not
+def HasHW64Mul : Predicate<"Subtarget->calVersion()"
+ ">= CAL_VERSION_SC_139"
+ "&& Subtarget->device()"
+ "->getGeneration() >="
+ "AMDILDeviceInfo::HD5XXX">;
+def HasSW64Mul : Predicate<"Subtarget->calVersion()"
+ "< CAL_VERSION_SC_139">;
+// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
+def HasHW64DivMod : Predicate<"Subtarget->device()"
+ "->usesHardware(AMDILDeviceInfo::HW64BitDivMod)">;
+def HasSW64DivMod : Predicate<"Subtarget->device()"
+ "->usesSoftware(AMDILDeviceInfo::HW64BitDivMod)">;
+
+
+// Predicate that is set to true if BFI/BFM are supported.
+def HasHWBitFieldInst : Predicate<"Subtarget->calVersion()"
+ ">= CAL_VERSION_SC_151"
+ "&& Subtarget->device()"
+ "->getGeneration() >="
+ "AMDILDeviceInfo::HD5XXX">;
+
+def HasHWDoubleAbs : Predicate<"Subtarget->calVersion()"
+ ">= CAL_VERSION_SC_153">;
+def HasSWDoubleAbs : Predicate<"Subtarget->calVersion()"
+ "< CAL_VERSION_SC_153">;
+def HasHWDoubleConv : Predicate<"Subtarget->calVersion()"
+ ">= CAL_VERSION_SC_155">;
+
+def IsEGOrLaterDevice : Predicate<"Subtarget->device()->getGeneration()"
+ " >= AMDILDeviceInfo::HD5XXX">;
+def HasByteShortUAV : Predicate<"Subtarget->device()"
+ "->getGeneration() >= AMDILDeviceInfo::HD7XXX">;
+
+def Has64BitPtr : Predicate<"Subtarget->is64bit()">;
+def Has32BitPtr : Predicate<"!Subtarget->is64bit()">;
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+include "AMDILOperands.td"
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Type Profiles
+//===--------------------------------------------------------------------===//
+include "AMDILProfiles.td"
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Nodes
+//===--------------------------------------------------------------------===//
+include "AMDILNodes.td"
+
+//===--------------------------------------------------------------------===//
+// Custom Pattern DAG Nodes
+//===--------------------------------------------------------------------===//
+include "AMDILPatterns.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction format classes
+//===----------------------------------------------------------------------===//
+include "AMDILFormats.td"
+
+//===--------------------------------------------------------------------===//
+// Multiclass Instruction formats
+//===--------------------------------------------------------------------===//
+include "AMDILMultiClass.td"
+
+//===--------------------------------------------------------------------===//
+// Intrinsics support
+//===--------------------------------------------------------------------===//
+include "AMDILIntrinsics.td"
+
+//===--------------------------------------------------------------------===//
+// Instructions support
+//===--------------------------------------------------------------------===//
+include "AMDILInstructions.td"
+include "AMDILMem64.td"
+include "AMDILMem32.td"
+//===--------------------------------------------------------------------===//
+// Instruction Pattern support - This Must be the last include in the file
+// as it requires items defined in other files
+//===--------------------------------------------------------------------===//
+include "AMDILInstrPatterns.td"
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,401 @@
+//===-- AMDILInstrPatterns.td ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i32 (or GPRI32:$src0, GPRI32:$src1)),
+ (i32 (BINARY_OR_i32 GPRI32:$src0, GPRI32:$src1))>;
+
+// integer subtraction
+// a - b ==> a + (-b)
+def SUB_i8 : Pat<(sub GPRI8:$src0, GPRI8:$src1),
+ (ADD_i8 GPRI8:$src0, (NEGATE_i8 GPRI8:$src1))>;
+def SUB_v2i8 : Pat<(sub GPRV2I8:$src0, GPRV2I8:$src1),
+ (ADD_v2i8 GPRV2I8:$src0, (NEGATE_v2i8 GPRV2I8:$src1))>;
+def SUB_v4i8 : Pat<(sub GPRV4I8:$src0, GPRV4I8:$src1),
+ (ADD_v4i8 GPRV4I8:$src0, (NEGATE_v4i8 GPRV4I8:$src1))>;
+def SUB_i16 : Pat<(sub GPRI16:$src0, GPRI16:$src1),
+ (ADD_i16 GPRI16:$src0, (NEGATE_i16 GPRI16:$src1))>;
+def SUB_v2i16 : Pat<(sub GPRV2I16:$src0, GPRV2I16:$src1),
+ (ADD_v2i16 GPRV2I16:$src0, (NEGATE_v2i16 GPRV2I16:$src1))>;
+def SUB_v4i16 : Pat<(sub GPRV4I16:$src0, GPRV4I16:$src1),
+ (ADD_v4i16 GPRV4I16:$src0, (NEGATE_v4i16 GPRV4I16:$src1))>;
+def SUB_i32 : Pat<(sub GPRI32:$src0, GPRI32:$src1),
+ (ADD_i32 GPRI32:$src0, (NEGATE_i32 GPRI32:$src1))>;
+def SUB_v2i32 : Pat<(sub GPRV2I32:$src0, GPRV2I32:$src1),
+ (ADD_v2i32 GPRV2I32:$src0, (NEGATE_v2i32 GPRV2I32:$src1))>;
+def SUB_v4i32 : Pat<(sub GPRV4I32:$src0, GPRV4I32:$src1),
+ (ADD_v4i32 GPRV4I32:$src0, (NEGATE_v4i32 GPRV4I32:$src1))>;
+
+// Convert between float -> ulong efficiently
+// static ulong
+// cf2ul(float f)
+// {
+// float fh = f * 0x1.0p-32f;
+// uint uh = (uint)fh;
+// float fuh = (float)uh;
+// float fl = mad(-0x1.0p+32f, fuh, f);
+// uint ul = (uint)fl;
+// return as_ulong((uint2)(ul, uh));
+// }
+def FTOUL_i64 : Pat<(i64 (fp_to_uint GPRF32:$src0)),
+ (LCREATE
+ (FTOU
+ (FMAD_f32
+ (IL_ASFLOAT_i32 (LOADCONST_i32 0xcf800000)),
+ (UTOF
+ (FTOU
+ (MUL_IEEE_f32 GPRF32:$src0,
+ (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000))
+ )
+ )
+ ),
+ GPRF32:$src0)
+ ),
+ (FTOU
+ (MUL_IEEE_f32 GPRF32:$src0,
+ (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000))
+ )
+ )
+ )>;
+
+// static ulong2
+// cf22ul2(float2 f)
+// {
+// float2 fh = f * 0x1.0p-32f;
+// uint2 uh = convert_uint2(fh);
+// float2 fuh = convert_float2(uh);
+// float2 fl = mad(-0x1.0p+32f, fuh, f);
+// uint2 ul = convert_uint2(fl);
+// return as_ulong2((uint4)(ul, uh));
+// }
+def FTOUL_v2i64 : Pat<(v2i64 (fp_to_uint GPRV2F32:$src0)),
+ (LCREATE_v2i64
+ (FTOU_v2i32
+ (FMAD_v2f32
+ (VCREATE_v2f32
+ (IL_ASFLOAT_i32 (LOADCONST_i32 0xcf800000))),
+ (UTOF_v2f32 (FTOU_v2i32
+ (MUL_IEEE_v2f32 GPRV2F32:$src0,
+ (VCREATE_v2f32
+ (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000)))))),
+ GPRV2F32:$src0)),
+ (FTOU_v2i32 (MUL_IEEE_v2f32 GPRV2F32:$src0,
+ (VCREATE_v2f32
+ (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000))))))>;
+
+// LLVM isn't lowering this correctly, so writing a pattern that
+// matches it isntead.
+def : Pat<(build_vector (f32 fpimm:$src)),
+ (VCREATE_v4f32 (LOADCONST_f32 fpimm:$src))>;
+def : Pat<(build_vector (i32 imm:$src)),
+ (VCREATE_v4i32 (LOADCONST_i32 imm:$src))>;
+def : Pat<(build_vector (i16 imm:$src)),
+ (VCREATE_v4i16 (LOADCONST_i16 imm:$src))>;
+def : Pat<(build_vector (i8 imm:$src)),
+ (VCREATE_v4i8 (LOADCONST_i8 imm:$src))>;
+def : Pat<(build_vector (f64 fpimm:$src)),
+ (VCREATE_v2f64 (LOADCONST_f64 fpimm:$src))>;
+def : Pat<(build_vector (f32 fpimm:$src)),
+ (VCREATE_v2f32 (LOADCONST_f32 fpimm:$src))>;
+def : Pat<(build_vector (i64 imm:$src)),
+ (VCREATE_v2i64 (LOADCONST_i64 imm:$src))>;
+def : Pat<(build_vector (i32 imm:$src)),
+ (VCREATE_v2i32 (LOADCONST_i32 imm:$src))>;
+def : Pat<(build_vector (i16 imm:$src)),
+ (VCREATE_v2i16 (LOADCONST_i16 imm:$src))>;
+def : Pat<(build_vector (i8 imm:$src)),
+ (VCREATE_v2i8 (LOADCONST_i8 imm:$src))>;
+
+// Correctly lower shl with 32bit left hand side immediate
+def : Pat<(i32 (shl imm:$src, GPRI64:$shift)),
+ (SHL_i32 (LOADCONST_i32 imm:$src), (LLO GPRI64:$shift))>;
+def : Pat<(i32 (sra imm:$src, GPRI64:$shift)),
+ (SHL_i32 (LOADCONST_i32 imm:$src), (LLO GPRI64:$shift))>;
+def : Pat<(i32 (srl imm:$src, GPRI64:$shift)),
+ (SHL_i32 (LOADCONST_i32 imm:$src), (LLO GPRI64:$shift))>;
+def : Pat<(i32 (shl GPRI32:$src, GPRI64:$shift)),
+ (SHL_i32 GPRI32:$src, (LLO GPRI64:$shift))>;
+def : Pat<(i32 (sra GPRI32:$src, GPRI64:$shift)),
+ (SHL_i32 GPRI32:$src, (LLO GPRI64:$shift))>;
+def : Pat<(i32 (srl GPRI32:$src, GPRI64:$shift)),
+ (SHL_i32 GPRI32:$src, (LLO GPRI64:$shift))>;
+// Correctly lower shl with 64bit right hand side immediate
+def : Pat<(i32 (shl GPRI32:$src, (i64 imm:$shift))),
+ (SHL_i32 GPRI32:$src, (LLO (LOADCONST_i64 imm:$shift)))>;
+def : Pat<(i32 (sra GPRI32:$src, (i64 imm:$shift))),
+ (SHL_i32 GPRI32:$src, (LLO (LOADCONST_i64 imm:$shift)))>;
+def : Pat<(i32 (srl GPRI32:$src, (i64 imm:$shift))),
+ (SHL_i32 GPRI32:$src, (LLO (LOADCONST_i64 imm:$shift)))>;
+
+// Calls:
+def : Pat<(IL_call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(IL_call texternalsym:$dst),
+ (CALL texternalsym:$dst)>;
+def : Pat<(IL_call tconstpool:$dst),
+ (CALL tconstpool:$dst)>;
+
+include "AMDILConversions.td"
+
+/// Bitfield Insert pattern fragments
+def isLoadConstantAllOnes : PatLeaf<(timm),
+ [{
+ return N->isAllOnesValue();
+ }]>;
+
+/// Pattern 1: (lhs & bitpat) | (rhs & ~bitpat)
+def bfi_pat1 : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+ (or
+ (and node:$lhs, node:$bitpat),
+ (and node:$rhs, (not node:$bitpat)))>;
+
+/// Pattern 1b: (lhs & bitpat) | (rhs & ~bitpat)
+/// FIXME: This pattern needs to be removed, but requires cleanup of IL_or
+def bfi_pat1b : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+ (IL_or
+ (and node:$lhs, node:$bitpat),
+ (and node:$rhs, (not node:$bitpat)))>;
+
+/// Pattern 2: (lhs & bitpat) | (rhs & (bitpat ^ -1))
+def bfi_pat2 : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+ (or
+ (and node:$lhs, node:$bitpat),
+ (and node:$rhs,
+ (xor node:$bitpat, isLoadConstantAllOnes) ))>;
+
+/// Pattern 2b: (lhs & bitpat) | (rhs & (bitpat ^ -1))
+/// FIXME: This pattern needs to be removed, but requires cleanup of IL_or
+def bfi_pat2b : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+ (IL_or
+ (and node:$lhs, node:$bitpat),
+ (and node:$rhs,
+ (xor node:$bitpat, isLoadConstantAllOnes) ))>;
+
+/// Pattern 3: (rhs ^ ((rhs ^ lhs) & bitpat))
+def bfi_pat3 : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+ (xor node:$rhs,
+ (and (xor node:$rhs, node:$lhs),
+ node:$bitpat))>;
+
+/// Bitfield Insert pattern fragments
+def isLoadConstantOne : PatLeaf<(timm),
+ [{
+ return N->isOne();
+ }]>;
+
+def is0x1FConstant : PatLeaf<(timm),
+ [{
+ return N->getZExtValue() == 0x1F;
+ }]>;
+
+def is0x3EConstant : PatLeaf<(timm),
+ [{
+ return N->getZExtValue() == 0x3E;
+ }]>;
+
+def is0x1FConstantOrLess : PatLeaf<(timm),
+ [{
+ return N->getZExtValue() <= 0x1F;
+ }]>;
+
+def bitmask_5bits : PatFrag<(ops node:$mask),
+ (and node:$mask, (i32 0x1f))>;
+
+/// Bitfield mask instruction patterns.
+/// Pattern 1: ((1 << (width & 0x1F)) + 0xFFFFFFFF) << (offset & 0x1F)
+def bfm_pat1 : PatFrag<(ops node:$width, node:$offset),
+ (shl (add (shl (i32 1), (bitmask_5bits node:$width)), (i32 0xFFFFFFFF)),
+ (bitmask_5bits node:$offset))>;
+
+/// Pattern 1: ((1 << (width & 0x1F)) + 0xFFFFFFFF) << (offset & 0x1F)
+/// FIXME: Need to remove this pattern, but requires clean up of IL_add pattern.
+def bfm_pat1b : PatFrag<(ops node:$width, node:$offset),
+ (shl (IL_add (shl (i32 1), (bitmask_5bits node:$width)), (i32 0xFFFFFFFF)),
+ (bitmask_5bits node:$offset))>;
+
+let Predicates = [HasHWBitFieldInst] in {
+defm BFI_PAT1A : TernaryPatFragI32<IL_OP_BFI, bfi_pat1>;
+defm BFI_PAT1B : TernaryPatFragI32<IL_OP_BFI, bfi_pat1b>;
+defm BFI_PAT2A : TernaryPatFragI32<IL_OP_BFI, bfi_pat2>;
+defm BFI_PAT2B : TernaryPatFragI32<IL_OP_BFI, bfi_pat2b>;
+defm BFI_PAT3 : TernaryPatFragI32<IL_OP_BFI, bfi_pat3>;
+defm BFM_PAT1A : BinaryPatFragI32<IL_OP_BFM, bfm_pat1>;
+defm BFM_PAT1B : BinaryPatFragI32<IL_OP_BFM, bfm_pat1b>;
+}
+
+//
+// bitalign
+// dst = (src0 << (32 - src2[4:0])) | (src1 >> src2[4:0])
+
+// A. src2 is constant
+def bitalign_1 : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
+ (or (shl node:$src0, node:$src3), (srl node:$src1, node:$src2)),
+ [{
+ SDNode *N_or1 = N->getOperand(1).getNode();
+ SDNode *N_src2 = N_or1->getOperand(1).getNode();
+ ConstantSDNode* CN_src2 = dyn_cast<ConstantSDNode>(N_src2);
+ if (!CN_src2) {
+ return false;
+ }
+
+ SDNode *N_or0 = N->getOperand(0).getNode();
+ SDNode *N_src3 = N_or0->getOperand(1).getNode();
+ ConstantSDNode* CN_src3 = dyn_cast<ConstantSDNode>(N_src3);
+ if (!CN_src3) {
+ return false;
+ }
+
+ uint32_t csrc2 = CN_src2->getZExtValue();
+ uint32_t csrc3 = CN_src3->getZExtValue();
+ return (csrc3 == (32 - csrc2));
+ }]>;
+
+def bitalign_1b : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
+ (IL_or (shl node:$src0, node:$src3), (srl node:$src1, node:$src2)),
+ [{
+ SDNode *N_or1 = N->getOperand(1).getNode();
+ SDNode *N_src2 = N_or1->getOperand(1).getNode();
+ ConstantSDNode* CN_src2 = dyn_cast<ConstantSDNode>(N_src2);
+ if (!CN_src2) {
+ return false;
+ }
+
+ SDNode *N_or0 = N->getOperand(0).getNode();
+ SDNode *N_src3 = N_or0->getOperand(1).getNode();
+ ConstantSDNode* CN_src3 = dyn_cast<ConstantSDNode>(N_src3);
+ if (!CN_src3) {
+ return false;
+ }
+
+ uint32_t csrc2 = CN_src2->getZExtValue();
+ uint32_t csrc3 = CN_src3->getZExtValue();
+ return (csrc3 == (32 - csrc2));
+ }]>;
+
+def bitalign_2 : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
+ (or (srl node:$src1, node:$src2), (shl node:$src0, node:$src3)),
+ [{
+ SDNode *N_or0 = N->getOperand(0).getNode();
+ SDNode *N_src2 = N_or0->getOperand(1).getNode();
+ ConstantSDNode* CN_src2 = dyn_cast<ConstantSDNode>(N_src2);
+ if (!CN_src2) {
+ return false;
+ }
+
+ SDNode *N_or1 = N->getOperand(1).getNode();
+ SDNode *N_src3 = N_or1->getOperand(1).getNode();
+ ConstantSDNode* CN_src3 = dyn_cast<ConstantSDNode>(N_src3);
+ if (!CN_src3) {
+ return false;
+ }
+
+ uint32_t csrc2 = CN_src2->getZExtValue();
+ uint32_t csrc3 = CN_src3->getZExtValue();
+ return (csrc3 == (32 - csrc2));
+ }]>;
+
+def bitalign_2b : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
+ (IL_or (srl node:$src1, node:$src2), (shl node:$src0, node:$src3)),
+ [{
+ SDNode *N_or0 = N->getOperand(0).getNode();
+ SDNode *N_src2 = N_or0->getOperand(1).getNode();
+ ConstantSDNode* CN_src2 = dyn_cast<ConstantSDNode>(N_src2);
+ if (!CN_src2) {
+ return false;
+ }
+
+ SDNode *N_or1 = N->getOperand(1).getNode();
+ SDNode *N_src3 = N_or1->getOperand(1).getNode();
+ ConstantSDNode* CN_src3 = dyn_cast<ConstantSDNode>(N_src3);
+ if (!CN_src3) {
+ return false;
+ }
+
+ uint32_t csrc2 = CN_src2->getZExtValue();
+ uint32_t csrc3 = CN_src3->getZExtValue();
+ return (csrc3 == (32 - csrc2));
+ }]>;
+
+// B. src2 is a variable
+
+def bitalign_3 : PatFrag<(ops node:$src0, node:$src1, node:$src2),
+ (or (shl node:$src0,
+ (bitmask_5bits (sub (i32 0), node:$src2))),
+ (srl node:$src1, (bitmask_5bits node:$src2)))>;
+
+def bitalign_3b : PatFrag<(ops node:$src0, node:$src1, node:$src2),
+ (IL_or (shl node:$src0,
+ (bitmask_5bits (sub (i32 0), node:$src2))),
+ (srl node:$src1, (bitmask_5bits node:$src2)))>;
+
+// TODO: Using FourInOneOut requires four inputs, but bitalign is actually
+// three inputs... Need to improve this.
+multiclass BitAlignPatFragCI32<ILOpCode opc, PatFrag node> {
+ def _i32 : FourInOneOut<opc, (outs GPRI32:$dst),
+ (ins GPRI32:$src0, GPRI32:$src1, GPRI32:$src2, GPRI32:$src3),
+ !strconcat(opc.Text, " $dst, $src0, $src1, $src2"),
+ [(set GPRI32:$dst, (node GPRI32:$src0, GPRI32:$src1, GPRI32:$src2, GPRI32:$src3))]>;
+}
+
+multiclass BitAlignPatFragSI32<ILOpCode opc, PatFrag node> {
+ def _i32 : ThreeInOneOut<opc, (outs GPRI32:$dst),
+ (ins GPRI32:$src0, GPRI32:$src1, GPRI32:$src2),
+ !strconcat(opc.Text, " $dst, $src0, $src1, $src2"),
+ [(set GPRI32:$dst, (node GPRI32:$src0, GPRI32:$src1, GPRI32:$src2))]>;
+}
+
+// Do bitalign pattern recognization if device is EG or later.
+let Predicates = [IsEGOrLaterDevice] in {
+defm BITALIGN_PAT_1 : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_1>;
+defm BITALIGN_PAT_1B : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_1b>;
+defm BITALIGN_PAT_2 : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_2>;
+defm BITALIGN_PAT_2B : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_2b>;
+defm BITALIGN_PAT_3 : BitAlignPatFragSI32<IL_OP_BIT_ALIGN, bitalign_3>;
+defm BITALIGN_PAT_3B : BitAlignPatFragSI32<IL_OP_BIT_ALIGN, bitalign_3b>;
+}
+
+// unpack[0-3] dst, src
+
+def unpack0 : PatFrag<(ops node:$src),
+ (uint_to_fp (and node:$src, (i32 0xFF)))>;
+def unpack0_1 : PatFrag<(ops node:$src),
+ (uint_to_fp (i32 (int_AMDIL_bit_extract_u32 (i32 8), (i32 0), node:$src)))>;
+def unpack1 : PatFrag<(ops node:$src),
+ (uint_to_fp (and (srl node:$src, (i32 8)), (i32 0xFF)))>;
+def unpack1_1 : PatFrag<(ops node:$src),
+ (uint_to_fp (i32 (int_AMDIL_bit_extract_u32 (i32 8), (i32 8), node:$src)))>;
+def unpack2 : PatFrag<(ops node:$src),
+ (uint_to_fp (and (srl node:$src, (i32 16)), (i32 0xFF)))>;
+def unpack2_1 : PatFrag<(ops node:$src),
+ (uint_to_fp (i32 (int_AMDIL_bit_extract_u32 (i32 8), (i32 16), node:$src)))>;
+def unpack3 : PatFrag<(ops node:$src), (uint_to_fp (srl node:$src, (i32 24)))>;
+def unpack3_1 : PatFrag<(ops node:$src),
+ (uint_to_fp (i32 (int_AMDIL_bit_extract_u32 (i32 8), (i32 24), node:$src)))>;
+
+multiclass UnpackPatFrag<ILOpCode opc, PatFrag node> {
+ def _i32 : OneInOneOut<opc, (outs GPRF32:$dst),
+ (ins GPRI32:$src),
+ !strconcat(opc.Text, " $dst, $src"),
+ [(set GPRF32:$dst, (node GPRI32:$src))]>;
+}
+
+let Predicates = [IsEGOrLaterDevice] in {
+defm UNPACK_PAT0 : UnpackPatFrag<IL_OP_UNPACK_0, unpack0>;
+defm UNPACK_PAT0_1 : UnpackPatFrag<IL_OP_UNPACK_0, unpack0_1>;
+defm UNPACK_PAT1 : UnpackPatFrag<IL_OP_UNPACK_1, unpack1>;
+defm UNPACK_PAT1_1 : UnpackPatFrag<IL_OP_UNPACK_1, unpack1_1>;
+defm UNPACK_PAT2 : UnpackPatFrag<IL_OP_UNPACK_2, unpack2>;
+defm UNPACK_PAT2_1 : UnpackPatFrag<IL_OP_UNPACK_2, unpack2_1>;
+defm UNPACK_PAT3 : UnpackPatFrag<IL_OP_UNPACK_3, unpack3>;
+defm UNPACK_PAT3_1 : UnpackPatFrag<IL_OP_UNPACK_3, unpack3_1>;
+}
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1310 @@
+//===-- AMDILInstructions.td ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+let isReMaterializable = 0, isAsCheapAsAMove = 1 in {
+ defm LOADCONST : ILConstant<"mov $dst, $val">;
+ defm MOVE : UnaryOpMC<IL_OP_MOV, IL_mov>;
+ defm PHIMOVE : UnaryOpMC<IL_OP_MOV, IL_phimov>;
+}
+defm BINARY_NOT : UnaryOpMC<IL_OP_I_NOT, IL_not>;
+defm BINARY_OR : BinaryOpMC<IL_OP_I_OR, IL_or>;
+defm BINARY_AND : BinaryOpMC<IL_OP_AND, IL_and>;
+defm BINARY_XOR : BinaryOpMC<IL_OP_I_XOR, IL_xor>;
+defm AND : BinaryOpMCInt<IL_OP_AND, and>;
+defm CMOV : BinaryOpMC<IL_OP_CMOV, IL_cmov>;
+defm DIV_INF : BinaryOpMC<IL_OP_DIV_INF, IL_div_inf>;
+defm SMAX : BinaryOpMCInt<IL_OP_I_MAX, IL_smax>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder for 64bit
+// instructions
+defm CMOVLOG : TernaryOpMC<IL_OP_CMOV_LOGICAL, IL_cmov_logical>;
+defm SELECTBIN : TernaryOpMCScalar<IL_OP_CMOV_LOGICAL, select>;
+//===---------------------------------------------------------------------===//
+// Signed 8bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+def INTTOANY_i8 : OneInOneOut<IL_OP_MOV, (outs GPRI8:$dst), (ins GPRI32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRI8:$dst, (IL_inttoany GPRI32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// Signed 16bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+def INTTOANY_i16: OneInOneOut<IL_OP_MOV, (outs GPRI16:$dst), (ins GPRI32:$src0),
+ !strconcat(IL_OP_MOV.Text," $dst, $src0"),
+ [(set GPRI16:$dst, (IL_inttoany GPRI32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// Signed 32bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+defm NEGATE : UnaryOpMCi32<IL_OP_I_NEGATE, IL_inegate>;
+defm SMUL : BinaryOpMCi32<IL_OP_I_MUL, mul>;
+defm SMULHI : BinaryOpMCi32<IL_OP_I_MUL_HIGH, mulhs>;
+defm SHL : BinaryOpMCi32Const<IL_OP_I_SHL, shl>;
+defm SHR : BinaryOpMCi32Const<IL_OP_I_SHR, sra>;
+let Predicates = [Has64BitPtr] in {
+defm SHL : BinaryOpMCi64Const<IL_OP_I_SHL, shl>;
+defm SHR : BinaryOpMCi64Const<IL_OP_I_SHR, sra>;
+defm USHR : BinaryOpMCi64Const<IL_OP_U_SHR, srl>;
+}
+defm SHLVEC : BinaryOpMCi32<IL_OP_I_SHL, shl>;
+defm SHRVEC : BinaryOpMCi32<IL_OP_I_SHR, sra>;
+defm ADD : BinaryOpMCi32<IL_OP_I_ADD, add>;
+defm CUSTOM_XOR : BinaryOpMCInt<IL_OP_I_XOR, xor>;
+// get rid of the addri via the tablegen instead of custom lowered instruction
+defm CUSTOM_ADD : BinaryOpMCi32<IL_OP_I_ADD, IL_add>;
+defm EADD : BinaryOpMCi32<IL_OP_I_ADD, adde>;
+def INTTOANY_i32: OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins GPRI32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRI32:$dst, (IL_inttoany GPRI32:$src0))]>;
+// Integer offsets for addressing
+def ADDir : TwoInOneOut<IL_OP_I_ADD, (outs GPRI32:$dst),
+ (ins MEM3232:$ptr, GPRI32:$offset),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $ptr, $offset"),
+ [(set GPRI32:$dst,
+ (IL_addaddrri ADDR:$ptr,
+ (i32 GPRI32:$offset)))]>;
+def ADDri : TwoInOneOut<IL_OP_I_ADD, (outs GPRI32:$dst),
+ (ins GPRI32:$offset, MEM3232:$ptr),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $offset, $ptr"),
+ [(set GPRI32:$dst,
+ (IL_addaddrir
+ (i32 GPRI32:$offset), ADDR:$ptr))]>;
+
+defm IFFB_HI : UnaryOpMCi32<IL_OP_I_FFB_HI, IL_ffb_hi>;
+defm IFFB_LO : UnaryOpMCi32<IL_OP_I_FFB_LO, IL_ffb_lo>;
+let mayLoad = 0, mayStore = 0 in {
+defm ABS : UnaryIntrinsicInt<IL_OP_ABS, int_AMDIL_abs>;
+defm BITCOUNT : UnaryIntrinsicInt<IL_OP_IBIT_COUNT, int_AMDIL_bit_count_i32>;
+defm FFB_LO : UnaryIntrinsicInt<IL_OP_I_FFB_LO, int_AMDIL_bit_find_first_lo>;
+defm FFB_HI : UnaryIntrinsicInt<IL_OP_I_FFB_HI, int_AMDIL_bit_find_first_hi>;
+defm FFB_SGN : UnaryIntrinsicInt<IL_OP_I_FFB_SGN,
+ int_AMDIL_bit_find_first_sgn>;
+defm IMULHI : BinaryIntrinsicInt<IL_OP_I_MUL_HIGH, int_AMDIL_mulhi_i32>;
+let Predicates = [HasHWSign24Bit] in {
+defm IMUL24 : BinaryIntrinsicInt<IL_OP_I_MUL24, int_AMDIL_mul24_i32>;
+defm IMULHI24 : BinaryIntrinsicInt<IL_OP_I_MULHI24, int_AMDIL_mulhi24_i32>;
+defm IMAD24 : TernaryIntrinsicInt<IL_OP_I_MAD24, int_AMDIL_mad24_i32>;
+}
+defm CARRY : BinaryIntrinsicInt<IL_OP_I_CARRY, int_AMDIL_carry_i32>;
+defm BORROW : BinaryIntrinsicInt<IL_OP_I_BORROW, int_AMDIL_borrow_i32>;
+defm IMIN : BinaryIntrinsicInt<IL_OP_I_MIN, int_AMDIL_min_i32>;
+defm IMAX : BinaryIntrinsicInt<IL_OP_I_MAX, int_AMDIL_max_i32>;
+defm CMOV_LOG : TernaryIntrinsicInt<IL_OP_CMOV_LOGICAL,
+ int_AMDIL_cmov_logical>;
+defm IBIT_EXTRACT : TernaryIntrinsicInt<IL_OP_IBIT_EXTRACT,
+ int_AMDIL_bit_extract_i32>;
+defm IMAD : TernaryIntrinsicInt<IL_OP_I_MAD, int_AMDIL_mad_i32>;
+defm SAD : TernaryIntrinsicInt<IL_OP_SAD, int_AMDIL_media_sad>;
+defm SADHI : TernaryIntrinsicInt<IL_OP_SAD_HI,
+ int_AMDIL_media_sad_hi>;
+}
+def SAD4_i32 : ThreeInOneOut<IL_OP_SAD4, (outs GPRI32:$dst),
+ (ins GPRV4I32:$src, GPRV4I32:$src1, GPRI32:$src2),
+ !strconcat(IL_OP_SAD4.Text, " $dst, $src, $src1, $src2"),
+ [(set GPRI32:$dst,
+ (int_AMDIL_media_sad4 GPRV4I32:$src, GPRV4I32:$src1,
+ GPRI32:$src2))]>;
+def FTOV4U8_i32 : OneInOneOut<IL_OP_F2U4, (outs GPRI32:$dst),
+ (ins GPRV4F32:$src),
+ !strconcat(IL_OP_F2U4.Text, " $dst, $src"),
+ [(set GPRI32:$dst,
+ (int_AMDIL_media_convert_f2v4u8 GPRV4F32:$src))]>;
+//===---------------------------------------------------------------------===//
+// Unsigned 32bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+defm UMUL : BinaryOpMCi32<IL_OP_U_MUL, IL_umul>;
+defm UMULHI : BinaryOpMCi32<IL_OP_U_MUL_HIGH, mulhu>;
+defm USHR : BinaryOpMCi32Const<IL_OP_U_SHR, srl>;
+defm USHRVEC : BinaryOpMCi32<IL_OP_U_SHR, srl>;
+defm UDIV : BinaryOpMCi32<IL_OP_U_DIV, udiv>;
+defm NATIVE_UDIV : BinaryIntrinsicInt<IL_OP_U_DIV, int_AMDIL_udiv>;
+let mayLoad=0, mayStore=0 in {
+defm UBIT_REVERSE : UnaryIntrinsicInt<IL_OP_UBIT_REVERSE,
+ int_AMDIL_bit_reverse_u32>;
+defm UMULHI_INT : BinaryIntrinsicInt<IL_OP_U_MUL_HIGH, int_AMDIL_mulhi_u32>;
+defm UMULHI24 : BinaryIntrinsicInt<IL_OP_U_MULHI24, int_AMDIL_mulhi24_u32>;
+defm UMUL24 : BinaryIntrinsicInt<IL_OP_U_MUL24, int_AMDIL_mul24_u32>;
+defm UMIN : BinaryIntrinsicInt<IL_OP_U_MIN, int_AMDIL_min_u32>;
+defm UMAX : BinaryIntrinsicInt<IL_OP_U_MAX, int_AMDIL_max_u32>;
+defm UBIT_EXTRACT : TernaryIntrinsicInt<IL_OP_UBIT_EXTRACT,
+ int_AMDIL_bit_extract_u32>;
+defm UBIT_INSERT : QuaternaryIntrinsicInt<IL_OP_UBIT_INSERT,
+ int_AMDIL_bit_insert_u32>;
+defm BFI : TernaryIntrinsicInt<IL_OP_BFI, int_AMDIL_bfi>;
+defm BFM : BinaryIntrinsicInt<IL_OP_BFM, int_AMDIL_bfm>;
+defm UMAD : TernaryIntrinsicInt<IL_OP_U_MAD, int_AMDIL_mad_u32>;
+defm UMAD24 : TernaryIntrinsicInt<IL_OP_U_MAD24, int_AMDIL_mad24_u32>;
+defm U4LERP : TernaryIntrinsicInt<IL_OP_U4_LERP,
+ int_AMDIL_media_lerp_u4>;
+defm BITALIGN : TernaryIntrinsicInt<IL_OP_BIT_ALIGN, int_AMDIL_media_bitalign>;
+defm BYTEALIGN : TernaryIntrinsicInt<IL_OP_BYTE_ALIGN, int_AMDIL_media_bytealign>;
+}
+//===---------------------------------------------------------------------===//
+// Signed 64bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+def LNEGATE : OneInOneOut<IL_OP_I64_NEGATE, (outs GPRI64:$dst), (ins GPRI64:$src),
+ !strconcat(IL_OP_I64_NEGATE.Text, " $dst, $src"),
+ [(set GPRI64:$dst, (IL_inegate GPRI64:$src))]>;
+def LNEGATE_v2i64: OneInOneOut<IL_OP_I64_NEGATE, (outs GPRV2I64:$dst),
+ (ins GPRV2I64:$src),
+ !strconcat(IL_OP_I64_NEGATE.Text, " $dst, $src"),
+ [(set GPRV2I64:$dst, (IL_inegate GPRV2I64:$src))]>;
+let Predicates = [HasHW64Bit] in {
+def LADD_i64 : TwoInOneOut<IL_OP_I64_ADD, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI64:$src2),
+ !strconcat(IL_OP_I64_ADD.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (IL_add GPRI64:$src1, GPRI64:$src2))]>;
+def LADD_v2i64 : TwoInOneOut<IL_OP_I64_ADD, (outs GPRV2I64:$dst),
+ (ins GPRV2I64:$src1, GPRV2I64:$src2),
+ !strconcat(IL_OP_I64_ADD.Text, " $dst, $src1, $src2"),
+ [(set GPRV2I64:$dst, (IL_add GPRV2I64:$src1, GPRV2I64:$src2))]>;
+defm IMIN64 : BinaryIntrinsicLong<IL_OP_I64_MIN, int_AMDIL_min_i32>;
+defm UMIN64 : BinaryIntrinsicLong<IL_OP_U64_MIN, int_AMDIL_min_u32>;
+defm IMAX64 : BinaryIntrinsicLong<IL_OP_I64_MAX, int_AMDIL_max_i32>;
+defm UMAX64 : BinaryIntrinsicLong<IL_OP_U64_MAX, int_AMDIL_max_u32>;
+}
+let Predicates = [HasHW64Bit] in {
+def LSHR : TwoInOneOut<IL_OP_I64_SHR, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI32:$src2),
+ !strconcat(IL_OP_I64_SHR.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (sra GPRI64:$src1, GPRI32:$src2))]>;
+def LSHL : TwoInOneOut<IL_OP_I64_SHL, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI32:$src2),
+ !strconcat(IL_OP_I64_SHL.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (shl GPRI64:$src1, GPRI32:$src2))]>;
+// Apple requires a pattern since they pass down the shift operand as
+// a 64bit value, although the lower 6 bits are all that are used.
+def LSHR_APPLE : TwoInOneOut<IL_OP_I64_SHR, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI64:$src2),
+ !strconcat(IL_OP_I64_SHR.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (sra GPRI64:$src1, GPRI64:$src2))]>;
+def LSHL_APPLE : TwoInOneOut<IL_OP_I64_SHL, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI64:$src2),
+ !strconcat(IL_OP_I64_SHL.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (shl GPRI64:$src1, GPRI64:$src2))]>;
+}
+
+
+//===---------------------------------------------------------------------===//
+// Unsigned 64bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+let Predicates = [HasTmrRegister] in {
+ def Tmr : ILFormat<IL_OP_MOV, (outs GPRXYI64:$tmr),
+ (ins), !strconcat(IL_OP_MOV.Text, " $tmr, Tmr.xyxy"),
+ [(set GPRXYI64:$tmr, (int_AMDIL_get_cycle_count))]>;
+}
+let Predicates = [IsEGOrLaterDevice] in {
+def CU_ID : ILFormat<IL_OP_CU_ID, (outs GPRI32:$id), (ins),
+ !strconcat(IL_OP_CU_ID.Text, " $id"),
+ [(set GPRI32:$id, (int_AMDIL_compute_unit_id))]>;
+def WAVE_ID : ILFormat<IL_OP_WAVE_ID, (outs GPRI32:$id), (ins),
+ !strconcat(IL_OP_WAVE_ID.Text, " $id"),
+ [(set GPRI32:$id, (int_AMDIL_wavefront_id))]>;
+}
+let Predicates = [HasHW64Bit] in {
+def LUSHR : TwoInOneOut<IL_OP_U64_SHR, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI32:$src2),
+ !strconcat(IL_OP_U64_SHR.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (srl GPRI64:$src1, GPRI32:$src2))]>;
+// Apple requires a pattern since they pass down the shift operand as
+// a 64bit value, although the lower 6 bits are all that are used.
+def LUSHR_APPLE : TwoInOneOut<IL_OP_U64_SHR, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI64:$src2),
+ !strconcat(IL_OP_U64_SHR.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (srl GPRI64:$src1, GPRI64:$src2))]>;
+}
+
+
+//===---------------------------------------------------------------------===//
+// Generic Float Instructions
+//===---------------------------------------------------------------------===//
+let hasIEEEFlag = 1 in {
+defm MUL_IEEE : BinaryOpMCFloat<IL_OP_MUL_IEEE, IL_OP_D_MUL, fmul>;
+}
+defm ADD : BinaryOpMCFloat<IL_OP_ADD, IL_OP_D_ADD, fadd>;
+//===---------------------------------------------------------------------===//
+// float math instructions start here
+//===---------------------------------------------------------------------===//
+let mayLoad=0, mayStore=0 in {
+defm ABS : UnaryIntrinsicFloat<IL_OP_ABS, int_AMDIL_fabs>;
+defm FRAC : UnaryIntrinsicFloat<IL_OP_FRC, int_AMDIL_fraction>;
+defm PIREDUCE : UnaryIntrinsicFloat<IL_OP_PI_REDUCE, int_AMDIL_pireduce>;
+defm ROUND_NEAREST : UnaryIntrinsicFloat<IL_OP_ROUND_NEAR,
+ int_AMDIL_round_nearest>;
+defm ROUND_NEGINF : UnaryIntrinsicFloat<IL_OP_ROUND_NEG_INF,
+ int_AMDIL_round_neginf>;
+defm ROUND_POSINF : UnaryIntrinsicFloat<IL_OP_ROUND_POS_INF,
+ int_AMDIL_round_posinf>;
+defm ROUND_ZERO : UnaryIntrinsicFloat<IL_OP_ROUND_ZERO,
+ int_AMDIL_round_zero>;
+defm ACOS : UnaryIntrinsicFloatScalar<IL_OP_ACOS, int_AMDIL_acos>;
+defm ATAN : UnaryIntrinsicFloatScalar<IL_OP_ATAN, int_AMDIL_atan>;
+defm ASIN : UnaryIntrinsicFloatScalar<IL_OP_ASIN, int_AMDIL_asin>;
+defm TAN : UnaryIntrinsicFloatScalar<IL_OP_TAN, int_AMDIL_tan>;
+defm SIN : UnaryIntrinsicFloatScalar<IL_OP_SIN, int_AMDIL_sin>;
+defm COS : UnaryIntrinsicFloatScalar<IL_OP_COS, int_AMDIL_cos>;
+defm SQRT : UnaryIntrinsicFloatScalar<IL_OP_SQRT, int_AMDIL_sqrt>;
+defm EXP : UnaryIntrinsicFloatScalar<IL_OP_EXP, int_AMDIL_exp>;
+defm EXPVEC : UnaryIntrinsicFloat<IL_OP_EXP_VEC, int_AMDIL_exp_vec>;
+defm SQRTVEC : UnaryIntrinsicFloat<IL_OP_SQRT_VEC, int_AMDIL_sqrt_vec>;
+defm COSVEC : UnaryIntrinsicFloat<IL_OP_COS_VEC, int_AMDIL_cos_vec>;
+defm SINVEC : UnaryIntrinsicFloat<IL_OP_SIN_VEC, int_AMDIL_sin_vec>;
+defm LOGVEC : UnaryIntrinsicFloat<IL_OP_LOG_VEC, int_AMDIL_log_vec>;
+defm RSQVEC : UnaryIntrinsicFloat<IL_OP_RSQ_VEC, int_AMDIL_rsq_vec>;
+defm EXN : UnaryIntrinsicFloatScalar<IL_OP_EXN, int_AMDIL_exn>;
+defm SIGN : UnaryIntrinsicFloat<IL_OP_SGN, int_AMDIL_sign>;
+defm LENGTH : UnaryIntrinsicFloat<IL_OP_LEN, int_AMDIL_length>;
+defm POW : BinaryIntrinsicFloat<IL_OP_POW, int_AMDIL_pow>;
+}
+
+let hasIEEEFlag = 1 in {
+ let mayLoad = 0, mayStore=0 in {
+defm MIN : BinaryIntrinsicFloat<IL_OP_MIN, int_AMDIL_min>;
+defm MAX : BinaryIntrinsicFloat<IL_OP_MAX, int_AMDIL_max>;
+defm MAD : TernaryIntrinsicFloat<IL_OP_MAD, int_AMDIL_mad>;
+ }
+defm MOD : BinaryOpMCf32<IL_OP_MOD, frem>;
+}
+let hasZeroOpFlag = 1 in {
+ let mayLoad = 0, mayStore=0 in {
+defm LN : UnaryIntrinsicFloatScalar<IL_OP_LN, int_AMDIL_ln>;
+defm LOG : UnaryIntrinsicFloatScalar<IL_OP_LOG, int_AMDIL_log>;
+defm RSQ : UnaryIntrinsicFloatScalar<IL_OP_RSQ, int_AMDIL_rsq>;
+defm DIV_INT : BinaryIntrinsicFloat<IL_OP_DIV, int_AMDIL_div>;
+defm DIV : BinaryOpMCf32<IL_OP_DIV, fdiv>;
+defm DIV_PRECISE : BinaryIntrinsicFloat<IL_OP_DIV_PRECISE, int_AMDIL_div_precise>;
+ }
+}
+ let mayLoad = 0, mayStore=0 in {
+defm CLAMP : TernaryIntrinsicFloat<IL_OP_CLAMP, int_AMDIL_clamp>;
+defm FMA : TernaryIntrinsicFloat<IL_OP_FMA, int_AMDIL_fma>;
+defm LERP : TernaryIntrinsicFloat<IL_OP_LERP, int_AMDIL_lerp>;
+ }
+defm SUB : BinaryOpMCf32<IL_OP_SUB, fsub>;
+defm FABS : UnaryOpMCf32<IL_OP_ABS, fabs>;
+defm FMAD : TernaryOpMCf32<IL_OP_MAD, IL_mad>;
+defm NEARBY : UnaryOpMCf32<IL_OP_ROUND_NEAR, fnearbyint>;
+defm TRUNC : UnaryOpMCf32<IL_OP_ROUND_ZERO, ftrunc>;
+defm CEIL : UnaryOpMCf32<IL_OP_ROUND_POS_INF, fceil>;
+defm FLOOR : UnaryOpMCf32<IL_OP_ROUND_NEG_INF, ffloor>;
+
+def NEG_f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst),
+ (ins GPRF32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(xyzw)"),
+ [(set GPRF32:$dst, (fneg GPRF32:$src0))]>;
+def INTTOANY_f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst),
+ (ins GPRI32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRF32:$dst, (IL_inttoany GPRI32:$src0))]>;
+let hasIEEEFlag = 1 in {
+def DP2ADD_f32 : ThreeInOneOut<IL_OP_DP2_ADD, (outs GPRF32:$dst),
+ (ins GPRV2F32:$src0, GPRV2F32:$src1, GPRF32:$src2),
+ !strconcat(IL_OP_DP2_ADD.Text, " $dst, $src0, $src1, $src2"),
+ [(set GPRF32:$dst,
+ (int_AMDIL_dp2_add GPRV2F32:$src0,
+ GPRV2F32:$src1, GPRF32:$src2))]>;
+def DP2_f32 : TwoInOneOut<IL_OP_DP2, (outs GPRF32:$dst),
+ (ins GPRV2F32:$src0, GPRV2F32:$src1),
+ !strconcat(IL_OP_DP2.Text, " $dst, $src0, $src1"),
+ [(set GPRF32:$dst,
+ (int_AMDIL_dp2 GPRV2F32:$src0, GPRV2F32:$src1))]>;
+def DP3_f32 : TwoInOneOut<IL_OP_DP3, (outs GPRF32:$dst),
+ (ins GPRV4F32:$src0, GPRV4F32:$src1),
+ !strconcat(IL_OP_DP3.Text, " $dst, $src0, $src1"),
+ [(set GPRF32:$dst,
+ (int_AMDIL_dp3 GPRV4F32:$src0, GPRV4F32:$src1))]>;
+def DP4_f32 : TwoInOneOut<IL_OP_DP4, (outs GPRF32:$dst),
+ (ins GPRV4F32:$src0, GPRV4F32:$src1),
+ !strconcat(IL_OP_DP4.Text, " $dst, $src0, $src1"),
+ [(set GPRF32:$dst,
+ (int_AMDIL_dp4 GPRV4F32:$src0, GPRV4F32:$src1))]>;
+def FTZ_f32 : OneInOneOut<IL_OP_MUL_IEEE, (outs GPRF32:$dst),
+ (ins GPRF32:$src), !strconcat(IL_OP_MUL_IEEE.Text, " $dst, $src, r0.1"),
+ [(set GPRF32:$dst,
+ (int_AMDIL_ftz GPRF32:$src))]>;
+def FTZ_v2f32 : OneInOneOut<IL_OP_MUL_IEEE, (outs GPRV2F32:$dst),
+ (ins GPRV2F32:$src), !strconcat(IL_OP_MUL_IEEE.Text, " $dst, $src, r0.1"),
+ [(set GPRV2F32:$dst,
+ (int_AMDIL_ftz GPRV2F32:$src))]>;
+def FTZ_v4f32 : OneInOneOut<IL_OP_MUL_IEEE, (outs GPRV4F32:$dst),
+ (ins GPRV4F32:$src), !strconcat(IL_OP_MUL_IEEE.Text, " $dst, $src, r0.1"),
+ [(set GPRV4F32:$dst,
+ (int_AMDIL_ftz GPRV4F32:$src))]>;
+}
+defm UNPACK_B0 : IntrConvertI32TOF32<IL_OP_UNPACK_0, int_AMDIL_media_unpack_byte_0>;
+defm UNPACK_B1 : IntrConvertI32TOF32<IL_OP_UNPACK_1, int_AMDIL_media_unpack_byte_1>;
+defm UNPACK_B2 : IntrConvertI32TOF32<IL_OP_UNPACK_2, int_AMDIL_media_unpack_byte_2>;
+defm UNPACK_B3 : IntrConvertI32TOF32<IL_OP_UNPACK_3, int_AMDIL_media_unpack_byte_3>;
+defm FTOI_FLR : IntrConvertF32TOI32<IL_OP_FTOI_FLR, int_AMDIL_convert_f32_i32_flr>;
+defm FTOI_RPI : IntrConvertF32TOI32<IL_OP_FTOI_RPI, int_AMDIL_convert_f32_i32_rpi>;
+defm HTOF : IntrConvertF16TOF32<IL_OP_F16_TO_F32, int_AMDIL_convert_f16_f32>;
+defm FTOH : IntrConvertF32TOF16<IL_OP_F32_TO_F16, int_AMDIL_convert_f32_f16>;
+defm FTOH_NEAR : IntrConvertF32TOF16<IL_OP_F32_TO_F16_NEAR, int_AMDIL_convert_f32_f16_near>;
+defm FTOH_NEG_INF : IntrConvertF32TOF16<IL_OP_F32_TO_F16_NEG_INF, int_AMDIL_convert_f32_f16_neg_inf>;
+defm FTOH_PLUS_INF : IntrConvertF32TOF16<IL_OP_F32_TO_F16_PLUS_INF, int_AMDIL_convert_f32_f16_plus_inf>;
+//===---------------------------------------------------------------------===//
+// float math instructions end here
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// float2 math instructions start here
+//===---------------------------------------------------------------------===//
+def NEG_v2f32 : OneInOneOut<IL_OP_MOV, (outs GPRV2F32:$dst),
+ (ins GPRV2F32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(xyzw)"),
+ [(set GPRV2F32:$dst, (fneg GPRV2F32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// float2 math instructions end here
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// float4 math instructions start here
+//===---------------------------------------------------------------------===//
+def NEG_v4f32 : OneInOneOut<IL_OP_MOV, (outs GPRV4F32:$dst),
+ (ins GPRV4F32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(xyzw)"),
+ [(set GPRV4F32:$dst, (fneg GPRV4F32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// float4 math instructions end here
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// double math instructions start here
+//===---------------------------------------------------------------------===//
+def SUB_f64 : TwoInOneOut<IL_OP_D_ADD, (outs GPRF64:$dst),
+ (ins GPRF64:$src0, GPRF64:$src1),
+ !strconcat(IL_OP_D_ADD.Text, " $dst, $src0, ${src1}_neg(yw)"),
+ [(set GPRF64:$dst, (fsub GPRF64:$src0, GPRF64:$src1))]>;
+def SUB_v2f64 : TwoInOneOut<IL_OP_D_ADD, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$src0, GPRV2F64:$src1),
+ !strconcat(IL_OP_D_ADD.Text, " $dst, $src0, ${src1}_neg(yw)"),
+ [(set GPRV2F64:$dst, (fsub GPRV2F64:$src0, GPRV2F64:$src1))]>;
+def NEG_f64 : OneInOneOut<IL_OP_MOV, (outs GPRF64:$dst),
+ (ins GPRF64:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(yw)"),
+ [(set GPRF64:$dst, (fneg GPRF64:$src0))]>;
+def NEG_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(yw)"),
+ [(set GPRV2F64:$dst, (fneg GPRV2F64:$src0))]>;
+ let mayLoad = 0, mayStore=0 in {
+defm MIN : BinaryIntrinsicDouble<IL_OP_D_MIN, int_AMDIL_min>;
+defm MAX : BinaryIntrinsicDouble<IL_OP_D_MAX, int_AMDIL_max>;
+defm DIV : BinaryIntrinsicDouble<IL_OP_D_DIV, int_AMDIL_div>;
+defm MAD : TernaryIntrinsicDouble<IL_OP_D_MAD, int_AMDIL_mad>;
+defm DFMA : TernaryIntrinsicDouble<IL_OP_D_MAD, int_AMDIL_fma>;
+defm FRAC : UnaryIntrinsicDouble<IL_OP_D_FRC, int_AMDIL_fraction>;
+defm SQRT : UnaryIntrinsicDouble<IL_OP_D_SQRT, int_AMDIL_sqrt>;
+defm RSQ : UnaryIntrinsicDoubleScalar<IL_OP_D_RSQ, int_AMDIL_rsq>;
+defm RCP : UnaryIntrinsicDoubleScalar<IL_OP_D_RCP, int_AMDIL_drcp>;
+defm DMAD : TernaryOpMCf64<IL_OP_D_MAD, IL_mad>;
+ }
+let Predicates = [HasHWDoubleAbs] in {
+defm DABS : UnaryOpMCf64<IL_OP_D_ABS, fabs>;
+ let mayLoad = 0, mayStore=0 in {
+defm ABS : UnaryIntrinsicDouble<IL_OP_D_ABS, int_AMDIL_fabs>;
+ }
+}
+let Predicates = [HasSWDoubleAbs] in {
+def SWDABS_f64 : OneInOneOut<IL_OP_D_ABS, (outs GPRF64:$dst),
+ (ins GPRF64:$src),
+ !strconcat(IL_OP_D_FREXP.Text," $dst, ${src}_abs(yw)"),
+ [(set GPRF64:$dst, (fabs GPRF64:$src))]>;
+ let mayLoad = 0, mayStore=0 in {
+def SWABS_f64 : OneInOneOut<IL_OP_D_ABS, (outs GPRF64:$dst),
+ (ins GPRF64:$src),
+ !strconcat(IL_OP_D_FREXP.Text," $dst, ${src}_abs(yw)"),
+ [(set GPRF64:$dst, (int_AMDIL_fabs GPRF64:$src))]>;
+ }
+}
+def FREXP_f64 : OneInOneOut<IL_OP_D_FREXP, (outs GPRV2I64:$dst),
+ (ins GPRF64:$src),
+ !strconcat(IL_OP_D_FREXP.Text," $dst, $src"),
+ [(set GPRV2I64:$dst,
+ (int_AMDIL_frexp_f64 GPRF64:$src))]>;
+def LDEXP_f64 : TwoInOneOut<IL_OP_D_LDEXP, (outs GPRF64:$dst),
+ (ins GPRF64:$src, GPRI32:$src1),
+ !strconcat(IL_OP_D_LDEXP.Text, " $dst, $src, $src1"),
+ [(set GPRF64:$dst,
+ (int_AMDIL_ldexp GPRF64:$src, GPRI32:$src1))]>;
+def LDEXP_v2f64 : TwoInOneOut<IL_OP_D_LDEXP, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$src, GPRV2I32:$src1),
+ !strconcat(IL_OP_D_LDEXP.Text, " $dst, $src, $src1"),
+ [(set GPRV2F64:$dst,
+ (int_AMDIL_ldexp GPRV2F64:$src, GPRV2I32:$src1))]>;
+//===---------------------------------------------------------------------===//
+// double math instructions end here
+//===---------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
+// Various Macros
+//===---------------------------------------------------------------------===//
+def MACRO__sdiv_i8 : BinaryMacro< GPRI8, GPRI8, GPRI8, sdiv>;
+def MACRO__sdiv_i16 : BinaryMacro<GPRI16, GPRI16, GPRI16, sdiv>;
+def MACRO__sdiv_i32 : BinaryMacro<GPRI32, GPRI32, GPRI32, sdiv>;
+def MACRO__udiv_i8 : BinaryMacro< GPRI8, GPRI8, GPRI8, udiv>;
+def MACRO__udiv_i16 : BinaryMacro<GPRI16, GPRI16, GPRI16, udiv>;
+def MACRO__udiv_i32 : BinaryMacro<GPRI32, GPRI32, GPRI32, udiv>;
+def MACRO__smod_i8 : BinaryMacro< GPRI8, GPRI8, GPRI8, srem>;
+def MACRO__smod_i16 : BinaryMacro<GPRI16, GPRI16, GPRI16, srem>;
+def MACRO__smod_i32 : BinaryMacro<GPRI32, GPRI32, GPRI32, srem>;
+def MACRO__umod_i8 : BinaryMacro< GPRI8, GPRI8, GPRI8, urem>;
+def MACRO__umod_i16 : BinaryMacro<GPRI16, GPRI16, GPRI16, urem>;
+def MACRO__umod_i32 : BinaryMacro<GPRI32, GPRI32, GPRI32, urem>;
+let Predicates = [HasSWDDiv] in {
+ def MACRO__ddiv_f64: BinaryMacro<GPRF64, GPRF64, GPRF64, fdiv>;
+}
+let Predicates = [HasHWDDiv] in {
+ def MACRO__ddiv_f64_fma: BinaryMacro<GPRF64, GPRF64, GPRF64, fdiv>;
+}
+def MACRO__ftol_i64 : UnaryMacro<GPRI64, GPRF32, fp_to_sint>;
+def MACRO__ultof_f32 : UnaryMacro<GPRF32, GPRI64, uint_to_fp>;
+def MACRO__ltof_f32 : UnaryMacro<GPRF32, GPRI64, sint_to_fp>;
+let Predicates = [HasSW64Mul] in {
+def MACRO__mul_i64 : BinaryMacro<GPRI64, GPRI64, GPRI64, mul>;
+def MACRO__mul_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, mul>;
+}
+let Predicates = [HasSW64DivMod] in {
+def MACRO__sdiv_i64 : BinaryMacro<GPRI64, GPRI64, GPRI64, sdiv>;
+def MACRO__udiv_i64 : BinaryMacro<GPRI64, GPRI64, GPRI64, udiv>;
+def MACRO__smod_i64 : BinaryMacro<GPRI64, GPRI64, GPRI64, srem>;
+def MACRO__umod_i64 : BinaryMacro<GPRI64, GPRI64, GPRI64, urem>;
+}
+
+let Predicates = [HasHW64DivMod] in {
+ defm SDIV : BinaryOpMCi64<IL_OP_I64_DIV, sdiv>;
+ defm UDIV : BinaryOpMCi64<IL_OP_U64_DIV, udiv>;
+ defm SMOD : BinaryOpMCi64<IL_OP_I64_MOD, srem>;
+ defm UMOD : BinaryOpMCi64<IL_OP_U64_MOD, urem>;
+}
+let Predicates = [HasHW64Mul] in {
+ defm SMUL : BinaryOpMCi64<IL_OP_I64_MUL, mul>;
+ defm UMUL : BinaryOpMCi64<IL_OP_U64_MUL, IL_umul>;
+}
+// Apple requires a pattern since they pass down the shift operand as
+// a 64bit value, although the lower 6 bits are all that are used.
+// vector 2 use the software emulated mode since SC only supports
+// scalar 64bit ops.
+def MACRO__shr_apple_v2i64: BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, srl>;
+def MACRO__shl_apple_v2i64: BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, shl>;
+def MACRO__sra_apple_v2i64: BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, sra>;
+def MACRO__shr_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, srl>;
+def MACRO__shl_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, shl>;
+def MACRO__sra_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, sra>;
+
+let Predicates = [HasSW64Bit] in {
+def MACRO__shr_apple_i64: BinaryMacro<GPRI64, GPRI64, GPRI64, srl>;
+def MACRO__shl_apple_i64: BinaryMacro<GPRI64, GPRI64, GPRI64, shl>;
+def MACRO__sra_apple_i64: BinaryMacro<GPRI64, GPRI64, GPRI64, sra>;
+def MACRO__shr_i64 : BinaryMacro<GPRI64, GPRI64, GPRI32, srl>;
+def MACRO__shl_i64 : BinaryMacro<GPRI64, GPRI64, GPRI32, shl>;
+def MACRO__sra_i64 : BinaryMacro<GPRI64, GPRI64, GPRI32, sra>;
+}
+
+//===---------------------------------------------------------------------===//
+// Comparison Instructions
+//===---------------------------------------------------------------------===//
+let usesCustomInserter = 1 in {
+ defm CMP : Compare<"Pseudo comparison instr">;
+}
+//===---------------------------------------------------------------------===//
+// 32-bit floating point operations
+//===---------------------------------------------------------------------===//
+def FEQ : TwoInOneOut<IL_OP_EQ, (outs GPRF32:$dst),
+ (ins GPRF32:$lhs, GPRF32:$rhs),
+ !strconcat(IL_OP_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def FGE : TwoInOneOut<IL_OP_GE, (outs GPRF32:$dst),
+ (ins GPRF32:$lhs, GPRF32:$rhs),
+ !strconcat(IL_OP_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def FLT : TwoInOneOut<IL_OP_LT, (outs GPRF32:$dst),
+ (ins GPRF32:$lhs, GPRF32:$rhs),
+ !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def FLT_v2f32 : TwoInOneOut<IL_OP_LT, (outs GPRV2F32:$dst),
+ (ins GPRV2F32:$lhs, GPRV2F32:$rhs),
+ !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def FLT_v4f32 : TwoInOneOut<IL_OP_LT, (outs GPRV4F32:$dst),
+ (ins GPRV4F32:$lhs, GPRV4F32:$rhs),
+ !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def FNE : TwoInOneOut<IL_OP_NE, (outs GPRF32:$dst),
+ (ins GPRF32:$lhs, GPRF32:$rhs),
+ !strconcat(IL_OP_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+
+//===---------------------------------------------------------------------===//
+//TODO: need to correctly define comparison instructions
+//===---------------------------------------------------------------------===//
+def DEQ : TwoInOneOut<IL_OP_D_EQ, (outs GPRF64:$dst),
+ (ins GPRF64:$lhs, GPRF64:$rhs),
+ !strconcat(IL_OP_D_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def DEQ_v2f64 : TwoInOneOut<IL_OP_D_EQ, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$lhs, GPRV2F64:$rhs),
+ !strconcat(IL_OP_D_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def DGE : TwoInOneOut<IL_OP_D_GE, (outs GPRF64:$dst),
+ (ins GPRF64:$lhs, GPRF64:$rhs),
+ !strconcat(IL_OP_D_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def DLT : TwoInOneOut<IL_OP_D_LT, (outs GPRF64:$dst),
+ (ins GPRF64:$lhs, GPRF64:$rhs),
+ !strconcat(IL_OP_D_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def DNE : TwoInOneOut<IL_OP_D_NE, (outs GPRF64:$dst),
+ (ins GPRF64:$lhs, GPRF64:$rhs),
+ !strconcat(IL_OP_D_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+
+//===---------------------------------------------------------------------===//
+//TODO: need to correctly define comparison instructions
+//===---------------------------------------------------------------------===//
+def IEQ : TwoInOneOut<IL_OP_I_EQ, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def IEQ_v2i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def IEQ_v4i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def IGE : TwoInOneOut<IL_OP_I_GE, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def IGE_v2i32 : TwoInOneOut<IL_OP_I_GE, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def IGE_v4i32 : TwoInOneOut<IL_OP_I_GE, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ILT : TwoInOneOut<IL_OP_I_LT, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ILT_v2i32 : TwoInOneOut<IL_OP_I_LT, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ILT_v4i32 : TwoInOneOut<IL_OP_I_LT, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def INE : TwoInOneOut<IL_OP_I_NE, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def INE_v2i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def INE_v4i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+let Predicates = [HasHW64Bit] in {
+def LEQ : TwoInOneOut<IL_OP_I64_EQ, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def LGE : TwoInOneOut<IL_OP_I64_GE, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def LLE : TwoInOneOut<IL_OP_I64_GE, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_GE.Text, " $dst, $rhs, $lhs")
+ , []>;
+def LGT : TwoInOneOut<IL_OP_I64_LT, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_LT.Text, " $dst, $rhs, $lhs")
+ , []>;
+def LLT : TwoInOneOut<IL_OP_I64_LT, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def LNE : TwoInOneOut<IL_OP_I64_NE, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+}
+
+//===---------------------------------------------------------------------===//
+// Unsigned Integer Operations
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+//TODO: need to correctly define comparison instructions
+//===---------------------------------------------------------------------===//
+def UEQ : TwoInOneOut<IL_OP_I_EQ, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UEQ_v2i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UEQ_v4i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULE : TwoInOneOut<IL_OP_U_GE, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULE_v2i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULE_v4i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGT : TwoInOneOut<IL_OP_U_LT, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGT_v2i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGT_v4i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGE : TwoInOneOut<IL_OP_U_GE, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGE_v2i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGE_v4i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULT : TwoInOneOut<IL_OP_U_LT, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULT_v2i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULT_v4i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UNE : TwoInOneOut<IL_OP_I_NE, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UNE_v2i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UNE_v4i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+let Predicates = [HasHW64Bit] in {
+def ULLE : TwoInOneOut<IL_OP_U64_GE, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_U64_GE.Text, " $dst, $rhs, $lhs")
+ , []>;
+def ULGT : TwoInOneOut<IL_OP_U64_LT, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_U64_LT.Text, " $dst, $rhs, $lhs")
+ , []>;
+def ULGE : TwoInOneOut<IL_OP_U64_GE, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_U64_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULLT : TwoInOneOut<IL_OP_U64_LT, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_U64_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+}
+//===---------------------------------------------------------------------===//
+// Scalar ==> Scalar conversion functions
+//===---------------------------------------------------------------------===//
+// f32 ==> f64
+def FTOD : UnaryOp<IL_OP_F_2_D, fextend, GPRF64, GPRF32>;
+// f64 ==> f32
+def DTOF : UnaryOp<IL_OP_D_2_F, IL_d2f, GPRF32, GPRF64>;
+// f32 ==> i32 signed
+def FTOI : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRI32, GPRF32>;
+def FTOI_v2i32 : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRV2I32, GPRV2F32>;
+def FTOI_v4i32 : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRV4I32, GPRV4F32>;
+// i32 ==> f32 signed
+def ITOF : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRF32, GPRI32>;
+def ITOF_v2f32 : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRV2F32, GPRV2I32>;
+def ITOF_v4f32 : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRV4F32, GPRV4I32>;
+// f32 ==> i32 unsigned
+def FTOU : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRI32, GPRF32>;
+def FTOU_v2i32 : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRV2I32, GPRV2F32>;
+def FTOU_v4i32 : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRV4I32, GPRV4F32>;
+// i32 ==> f32 unsigned
+def UTOF : UnaryOp<IL_OP_UTOF, uint_to_fp, GPRF32, GPRI32>;
+def UTOF_v2f32 : UnaryOp<IL_OP_UTOF, uint_to_fp, GPRV2F32, GPRV2I32>;
+def UTOF_v4f32 : UnaryOp<IL_OP_UTOF, uint_to_fp, GPRV4F32, GPRV4I32>;
+let Predicates = [HasHWDoubleConv] in {
+ // f64 ==> i32 signed
+ def DTOI : UnaryOp<IL_OP_DTOI, fp_to_sint, GPRI32, GPRF64>;
+ // i32 ==> f64 signed
+ def ITOD : UnaryOp<IL_OP_ITOD, sint_to_fp, GPRF64, GPRI32>;
+ // f64 ==> i32 unsigned
+ def DTOU : UnaryOp<IL_OP_DTOU, fp_to_uint, GPRI32, GPRF64>;
+ // i32 ==> f64 unsigned
+ def UTOD : UnaryOp<IL_OP_UTOD, uint_to_fp, GPRF64, GPRI32>;
+}
+// Get upper 32 bits of f64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DHI : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins GPRF64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (IL_dcomphi GPRF64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DHI_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ (ins GPRV2F64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (IL_dcomphi2 GPRV2F64:$src))]>;
+// Get lower 32 bits of f64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DLO : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins GPRF64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (IL_dcomplo GPRF64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DLO_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ (ins GPRV2F64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (IL_dcomplo2 GPRV2F64:$src))]>;
+// Convert two 32 bit integers into a f64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DCREATE : TwoInOneOut<IL_OP_I_ADD, (outs GPRF64:$dst),
+ (ins GPRI32:$src0, GPRI32:$src1),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+ [(set GPRF64:$dst, (IL_dcreate GPRI32:$src0, GPRI32:$src1))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DCREATE_v2f64 : TwoInOneOut<IL_OP_I_ADD, (outs GPRV2F64:$dst),
+ (ins GPRV2I32:$src0, GPRV2I32:$src1),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+ [(set GPRV2F64:$dst,
+ (IL_dcreate2 GPRV2I32:$src0, GPRV2I32:$src1))]>;
+// Get upper 32 bits of i64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LHI : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins GPRI64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (IL_lcomphi GPRI64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LHI_v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ (ins GPRV2I64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (IL_lcomphi2 GPRV2I64:$src))]>;
+// Get lower 32 bits of i64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LLO : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins GPRI64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (IL_lcomplo GPRI64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LLO_v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ (ins GPRV2I64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (IL_lcomplo2 GPRV2I64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def HILO_BITOR_v4i16 : TwoInOneOut<IL_OP_I_OR, (outs GPRI32:$dst),
+ (ins GPRI32:$src, GPRI32:$src2),
+ !strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def HILO_BITOR_v2i32 : TwoInOneOut<IL_OP_I_OR, (outs GPRI32:$dst),
+ (ins GPRI32:$src, GPRI32:$src2),
+ !strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def HILO_BITOR_v2i64 : TwoInOneOut<IL_OP_I_OR, (outs GPRI64:$dst),
+ (ins GPRI64:$src, GPRI64:$src2),
+ !strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
+// Convert two 32 bit integers into a i64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LCREATE : TwoInOneOut<IL_OP_I_ADD, (outs GPRI64:$dst),
+ (ins GPRI32:$src0, GPRI32:$src1),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+ [(set GPRI64:$dst, (IL_lcreate GPRI32:$src0, GPRI32:$src1))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LCREATE_v2i64 : TwoInOneOut<IL_OP_I_ADD, (outs GPRV2I64:$dst),
+ (ins GPRV2I32:$src0, GPRV2I32:$src1),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+ [(set GPRV2I64:$dst,
+ (IL_lcreate2 GPRV2I32:$src0, GPRV2I32:$src1))]>;
+//===---------------------------------------------------------------------===//
+// Scalar ==> Vector conversion functions
+//===---------------------------------------------------------------------===//
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VCREATE : UnaryOpMCVec<IL_OP_MOV, IL_vbuild>;
+
+//===---------------------------------------------------------------------===//
+// Vector ==> Scalar conversion functions
+//===---------------------------------------------------------------------===//
+
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VEXTRACT : VectorExtract<IL_vextract>;
+
+//===---------------------------------------------------------------------===//
+// Vector ==> Vector conversion functions
+//===---------------------------------------------------------------------===//
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VINSERT : VectorInsert<IL_vinsert>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VCONCAT : VectorConcat<IL_vconcat>;
+
+//===---------------------------------------------------------------------===//
+// Bit conversion functions
+//===---------------------------------------------------------------------===//
+defm IL_ASCHAR : BitConversion<IL_OP_MOV, GPRI8, IL_bitconv>;
+defm IL_ASSHORT : BitConversion<IL_OP_MOV, GPRI16, IL_bitconv>;
+defm IL_ASINT : BitConversion<IL_OP_MOV, GPRI32, IL_bitconv>;
+defm IL_ASFLOAT : BitConversion<IL_OP_MOV, GPRF32, IL_bitconv>;
+defm IL_ASDOUBLE : BitConversion<IL_OP_MOV, GPRF64, IL_bitconv>;
+defm IL_ASLONG : BitConversion<IL_OP_MOV, GPRI64, IL_bitconv>;
+defm IL_ASV2CHAR : BitConversion<IL_OP_MOV, GPRV2I8, IL_bitconv>;
+defm IL_ASV2SHORT : BitConversion<IL_OP_MOV, GPRV2I16, IL_bitconv>;
+defm IL_ASV2INT : BitConversion<IL_OP_MOV, GPRV2I32, IL_bitconv>;
+defm IL_ASV2FLOAT : BitConversion<IL_OP_MOV, GPRV2F32, IL_bitconv>;
+defm IL_ASV2DOUBLE : BitConversion<IL_OP_MOV, GPRV2F64, IL_bitconv>;
+defm IL_ASV2LONG : BitConversion<IL_OP_MOV, GPRV2I64, IL_bitconv>;
+defm IL_ASV4CHAR : BitConversion<IL_OP_MOV, GPRV4I8, IL_bitconv>;
+defm IL_ASV4SHORT : BitConversion<IL_OP_MOV, GPRV4I16, IL_bitconv>;
+defm IL_ASV4INT : BitConversion<IL_OP_MOV, GPRV4I32, IL_bitconv>;
+defm IL_ASV4FLOAT : BitConversion<IL_OP_MOV, GPRV4F32, IL_bitconv>;
+
+//===---------------------------------------------------------------------===//
+// Custom Inserter for Branches and returns, this eventually will be a
+// seperate pass
+//===---------------------------------------------------------------------===//
+let isTerminator = 1 in {
+ def BRANCH : ILFormat<IL_PSEUDO_INST, (outs), (ins brtarget:$target),
+ "; Pseudo unconditional branch instruction",
+ [(br bb:$target)]>;
+ defm BRANCH_COND : BranchConditional<IL_brcond>;
+}
+//===---------------------------------------------------------------------===//
+// return instructions
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def RETURN : ILFormat<IL_OP_RET,(outs), (ins variable_ops),
+ IL_OP_RET.Text, []>;
+ def RETDYN : ILFormat<IL_OP_RET_DYN, (outs), (ins variable_ops),
+ IL_OP_RET_DYN.Text, [(IL_retflag)]>;
+}
+//===---------------------------------------------------------------------===//
+// Lower and raise the stack x amount
+//===---------------------------------------------------------------------===//
+def ADJCALLSTACKDOWN : ILFormat<IL_PSEUDO_INST, (outs), (ins i32imm:$amt),
+ "; begin of call sequence $amt",
+ [(IL_callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : ILFormat<IL_PSEUDO_INST, (outs), (ins i32imm:$amt1,
+ i32imm:$amt2),
+ "; end of call sequence $amt1 $amt2",
+ [(IL_callseq_end timm:$amt1, timm:$amt2)]>;
+
+//===---------------------------------------------------------------------===//
+// Handle a function call
+//===---------------------------------------------------------------------===//
+let isCall = 1 in {
+ let Uses = [
+ R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32
+ ] in {
+ def CALL : UnaryOpNoRet<IL_OP_CALL, (outs),
+ (ins calltarget:$dst, variable_ops),
+ !strconcat(IL_OP_CALL.Text, " $dst"), []>;
+ }
+}
+
+
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+let isTerminator=1 in {
+ def SWITCH : ILFormat<IL_OP_SWITCH, (outs), (ins GPRI32:$src),
+ !strconcat(IL_OP_SWITCH.Text, " $src"), []>;
+ def CASE : ILFormat<IL_OP_CASE, (outs), (ins GPRI32:$src),
+ !strconcat(IL_OP_CASE.Text, " $src"), []>;
+ def BREAK : ILFormat<IL_OP_BREAK, (outs), (ins),
+ IL_OP_BREAK.Text, []>;
+ def CONTINUE : ILFormat<IL_OP_CONTINUE, (outs), (ins),
+ IL_OP_CONTINUE.Text, []>;
+ def DEFAULT : ILFormat<IL_OP_DEFAULT, (outs), (ins),
+ IL_OP_DEFAULT.Text, []>;
+ def ELSE : ILFormat<IL_OP_ELSE, (outs), (ins),
+ IL_OP_ELSE.Text, []>;
+ def ENDSWITCH : ILFormat<IL_OP_ENDSWITCH, (outs), (ins),
+ IL_OP_ENDSWITCH.Text, []>;
+ def ENDMAIN : ILFormat<IL_OP_ENDMAIN, (outs), (ins),
+ IL_OP_ENDMAIN.Text, []>;
+ def END : ILFormat<IL_OP_END, (outs), (ins),
+ IL_OP_END.Text, []>;
+ def ENDFUNC : ILFormat<IL_OP_ENDFUNC, (outs), (ins),
+ IL_OP_ENDFUNC.Text, []>;
+ def ENDIF : ILFormat<IL_OP_ENDIF, (outs), (ins),
+ IL_OP_ENDIF.Text, []>;
+ def WHILELOOP : ILFormat<IL_OP_WHILE, (outs), (ins),
+ IL_OP_WHILE.Text, []>;
+ def ENDLOOP : ILFormat<IL_OP_ENDLOOP, (outs), (ins),
+ IL_OP_ENDLOOP.Text, []>;
+ def FUNC : ILFormat<IL_OP_FUNC, (outs), (ins),
+ IL_OP_FUNC.Text, []>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALNZ : BranchInstr<IL_OP_IF_LOGICALNZ>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALZ : BranchInstr<IL_OP_IF_LOGICALZ>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALNZ : BranchInstr<IL_OP_BREAK_LOGICALNZ>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALZ : BranchInstr<IL_OP_BREAK_LOGICALZ>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALNZ : BranchInstr<IL_OP_CONTINUE_LOGICALNZ>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALZ : BranchInstr<IL_OP_CONTINUE_LOGICALZ>;
+ defm IFC : BranchInstr2<IL_OP_IFC>;
+ defm BREAKC : BranchInstr2<IL_OP_BREAKC>;
+ defm CONTINUEC : BranchInstr2<IL_OP_CONTINUEC>;
+}
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def TRAP : ILFormat<IL_OP_NOP, (outs), (ins),
+ IL_OP_NOP.Text, [(trap)]>;
+}
+
+//===---------------------------------------------------------------------===//
+//----------------- Work Item Functions - OpenCL 6.11.1 ---------------------//
+//===---------------------------------------------------------------------===//
+let isCall=1, isAsCheapAsAMove = 1 in {
+ def GET_WORK_DIM : ILFormat<IL_OP_MOV, (outs GPRXI32:$dst), (ins),
+ !strconcat(IL_OP_MOV.Text, " $dst, cb0[0].w"),
+ [(set GPRXI32:$dst, (int_AMDIL_get_work_dim))]>;
+
+ def GET_GLOBAL_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1021.xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_global_id))]>;
+
+ def GET_LOCAL_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1022.xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_local_id))]>;
+
+ def GET_GROUP_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1023.xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_group_id))]>;
+
+ def GET_GLOBAL_SIZE : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[0].xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_global_size))]>;
+
+ def GET_LOCAL_SIZE : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[1].xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_local_size))]>;
+
+ def GET_NUM_GROUPS : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[2].xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_num_groups))]>;
+
+ let Predicates = [Has32BitPtr] in {
+ def GET_GLOBAL_OFFSET : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[9].xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_global_offset))]>;
+ }
+
+ let Predicates = [Has64BitPtr] in {
+ def GET_GLOBAL_OFFSET64 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[9].yzw0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_global_offset))]>;
+ }
+}
+//===---------------------------------------------------------------------===//
+//------------- Synchronization Functions - OpenCL 6.11.9 -------------------//
+//===---------------------------------------------------------------------===//
+let isCall=1 in {
+ def FENCE : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_lds_memory_gds",
+ [(int_AMDIL_fence GPRI32:$flag)]>;
+
+ def FENCE_LOCAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_lds",
+ [(int_AMDIL_fence_local GPRI32:$flag)]>;
+
+ def FENCE_GLOBAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_memory",
+ [(int_AMDIL_fence_global GPRI32:$flag)]>;
+
+ def FENCE_GLOBAL_LOCAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_memory_lds",
+ [(int_AMDIL_fence_global_local GPRI32:$flag)]>;
+
+ def FENCE_REGION : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_gds",
+ [(int_AMDIL_fence_region GPRI32:$flag)]>;
+
+ def FENCE_REGION_LOCAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_gds_lds",
+ [(int_AMDIL_fence_region_local GPRI32:$flag)]>;
+
+ def FENCE_REGION_GLOBAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_gds_memory",
+ [(int_AMDIL_fence_region_global GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_lds_memory_gds_mem_read_only",
+ [(int_AMDIL_read_fence GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY_LOCAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_lds_mem_read_only",
+ [(int_AMDIL_read_fence_local GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_memory_mem_read_only",
+ [(int_AMDIL_read_fence_global GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY_GLOBAL_LOCAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_memory_lds_mem_read_only",
+ [(int_AMDIL_read_fence_global_local GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY_REGION : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_gds_mem_read_only",
+ [(int_AMDIL_read_fence_region GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY_REGION_LOCAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_gds_lds_mem_read_only",
+ [(int_AMDIL_read_fence_region_local GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY_REGION_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_gds_memory_mem_read_only",
+ [(int_AMDIL_read_fence_region_global GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_lds_memory_gds_mem_write_only",
+ [(int_AMDIL_write_fence GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY_LOCAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_lds_mem_write_only",
+ [(int_AMDIL_write_fence_local GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_memory_mem_write_only",
+ [(int_AMDIL_write_fence_global GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY_GLOBAL_LOCAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_memory_lds_mem_write_only",
+ [(int_AMDIL_write_fence_global_local GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY_REGION : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_gds_mem_write_only",
+ [(int_AMDIL_write_fence_region GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY_REGION_LOCAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_gds_lds_mem_write_only",
+ [(int_AMDIL_write_fence_region_local GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY_REGION_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
+ "fence_gds_memory_mem_write_only",
+ [(int_AMDIL_write_fence_region_global GPRI32:$flag)]>;
+
+}
+
+let isReturn = 1 in {
+ def EARLY_EXIT : UnaryOpNoRet<IL_OP_RET_LOGICALNZ, (outs),
+ (ins GPRI32:$flag),
+ !strconcat(IL_OP_RET_LOGICALNZ.Text, " $flag"),
+ [(int_AMDIL_early_exit GPRI32:$flag)]>;
+}
+def MEDIA_UNPACK_0 : OneInOneOut<IL_OP_UNPACK_0, (outs GPRV4F32:$dst),
+ (ins GPRV4I32:$src),
+ !strconcat(IL_OP_UNPACK_0.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst,
+ (v4f32 (int_AMDIL_media_unpack_byte_0 GPRV4I32:$src)))]>;
+def MEDIA_UNPACK_1 : OneInOneOut<IL_OP_UNPACK_1, (outs GPRV4F32:$dst),
+ (ins GPRV4I32:$src),
+ !strconcat(IL_OP_UNPACK_1.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst,
+ (v4f32 (int_AMDIL_media_unpack_byte_1 GPRV4I32:$src)))]>;
+def MEDIA_UNPACK_2 : OneInOneOut<IL_OP_UNPACK_2, (outs GPRV4F32:$dst),
+ (ins GPRV4I32:$src),
+ !strconcat(IL_OP_UNPACK_2.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst,
+ (v4f32 (int_AMDIL_media_unpack_byte_2 GPRV4I32:$src)))]>;
+def MEDIA_UNPACK_3 : OneInOneOut<IL_OP_UNPACK_3, (outs GPRV4F32:$dst),
+ (ins GPRV4I32:$src),
+ !strconcat(IL_OP_UNPACK_3.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst,
+ (v4f32 (int_AMDIL_media_unpack_byte_3 GPRV4I32:$src)))]>;
+
+def SEMAPHORE_INIT : BinaryOpNoRet<IL_OP_SEMAPHORE_INIT, (outs),
+ (ins MEM3232:$ptr, GPRI32:$val),
+ !strconcat(IL_OP_SEMAPHORE_INIT.Text, "_id($ptr)_value($val)"),
+ [(int_AMDIL_semaphore_init ADDR:$ptr, GPRI32:$val)]>;
+
+def SEMAPHORE_WAIT : UnaryOpNoRet<IL_OP_SEMAPHORE_WAIT, (outs),
+ (ins MEM3232:$ptr),
+ !strconcat(IL_OP_SEMAPHORE_WAIT.Text, "_id($ptr)"),
+ [(int_AMDIL_semaphore_wait ADDR:$ptr)]>;
+
+def SEMAPHORE_SIGNAL : UnaryOpNoRet<IL_OP_SEMAPHORE_SIGNAL, (outs),
+ (ins MEM3232:$ptr),
+ !strconcat(IL_OP_SEMAPHORE_SIGNAL.Text, "_id($ptr)"),
+ [(int_AMDIL_semaphore_signal ADDR:$ptr)]>;
+
+let hasIEEEFlag = 1 in {
+ defm MIN3 : TernaryIntrinsicFloat<IL_OP_MIN3, int_AMDIL_min3>;
+ defm MED3 : TernaryIntrinsicFloat<IL_OP_MED3, int_AMDIL_med3>;
+ defm MAX3 : TernaryIntrinsicFloat<IL_OP_MAX3, int_AMDIL_max3>;
+ defm IMIN3 : TernaryIntrinsicInt<IL_OP_I_MIN3, int_AMDIL_min3_i32>;
+ defm IMED3 : TernaryIntrinsicInt<IL_OP_I_MED3, int_AMDIL_med3_i32>;
+ defm IMAX3 : TernaryIntrinsicInt<IL_OP_I_MAX3, int_AMDIL_max3_i32>;
+ defm UMIN3 : TernaryIntrinsicInt<IL_OP_U_MIN3, int_AMDIL_min3_u32>;
+ defm UMED3 : TernaryIntrinsicInt<IL_OP_U_MED3, int_AMDIL_med3_u32>;
+ defm UMAX3 : TernaryIntrinsicInt<IL_OP_U_MAX3, int_AMDIL_max3_u32>;
+}
+
+def CLASS_f32 : TwoInOneOut<IL_OP_CLASS, (outs GPRI32:$dst),
+ (ins GPRF32:$src, GPRI32:$flag),
+ !strconcat(IL_OP_CLASS.Text, " $dst, $src, $flag"),
+ [(set GPRI32:$dst,
+ (int_AMDIL_class GPRF32:$src, GPRI32:$flag))]>;
+def CLASS_v2f32 : TwoInOneOut<IL_OP_CLASS, (outs GPRV2I32:$dst),
+ (ins GPRV2F32:$src, GPRV2I32:$flag),
+ !strconcat(IL_OP_CLASS.Text, " $dst, $src, $flag"),
+ [(set GPRV2I32:$dst,
+ (int_AMDIL_class GPRV2F32:$src, GPRV2I32:$flag))]>;
+def CLASS_v4f32 : TwoInOneOut<IL_OP_CLASS, (outs GPRV4I32:$dst),
+ (ins GPRV4F32:$src, GPRV4I32:$flag),
+ !strconcat(IL_OP_CLASS.Text, " $dst, $src, $flag"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_class GPRV4F32:$src, GPRV4I32:$flag))]>;
+def CLASS_f64 : TwoInOneOut<IL_OP_D_CLASS, (outs GPRI32:$dst),
+ (ins GPRF64:$src, GPRI32:$flag),
+ !strconcat(IL_OP_D_CLASS.Text, " $dst, $src, $flag"),
+ [(set GPRI32:$dst,
+ (int_AMDIL_class GPRF64:$src, GPRI32:$flag))]>;
+def CLASS_v2f64 : TwoInOneOut<IL_OP_D_CLASS, (outs GPRV2I32:$dst),
+ (ins GPRV2F64:$src, GPRV2I32:$flag),
+ !strconcat(IL_OP_D_CLASS.Text, " $dst, $src, $flag"),
+ [(set GPRV2I32:$dst,
+ (int_AMDIL_class GPRV2F64:$src, GPRV2I32:$flag))]>;
+
+
+defm FREXP_EXP : IntrConvertF32TOI32<IL_OP_FREXP_EXP, int_AMDIL_frexp_exp>;
+def FREXP_EXP_f64 : OneInOneOut<IL_OP_D_FREXP_EXP, (outs GPRI32:$dst),
+ (ins GPRF64:$src),
+ !strconcat(IL_OP_D_FREXP_EXP.Text, " $dst, $src"),
+ [(set GPRI32:$dst,
+ (int_AMDIL_frexp_exp GPRF64:$src))]>;
+def FREXP_EXP_v2f64 : OneInOneOut<IL_OP_D_FREXP_EXP, (outs GPRV2I32:$dst),
+ (ins GPRV2F64:$src),
+ !strconcat(IL_OP_D_FREXP_EXP.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst,
+ (int_AMDIL_frexp_exp GPRV2F64:$src))]>;
+
+defm FREXP_MANT : UnaryIntrinsicFloat<IL_OP_FREXP_MANT, int_AMDIL_frexp_mant>;
+defm SAD16 : TernaryIntrinsicInt<IL_OP_SAD_U16, int_AMDIL_media_sad16>;
+defm SAD32 : TernaryIntrinsicInt<IL_OP_SAD_U32, int_AMDIL_media_sad32>;
+
+let hasZeroOpFlag = 1 in {
+ let mayLoad = 0, mayStore=0 in {
+defm DDIV_INT : BinaryIntrinsicDouble<IL_OP_D_DIV, int_AMDIL_div>;
+defm DDIV : BinaryOpMCf64<IL_OP_D_DIV, fdiv>;
+ }
+}
+
+defm FREXP_MANT : UnaryIntrinsicDouble<IL_OP_D_FREXP_MANT, int_AMDIL_frexp_mant>;
+
+def DTRIG_PREOP : TwoInOneOut<IL_OP_D_TRIG_PREOP, (outs GPRF64:$dst),
+ (ins GPRF64:$src0, GPRF32:$src1),
+ !strconcat(IL_OP_D_TRIG_PREOP.Text, " $dst, $src0, $src1"),
+ [(set GPRF64:$dst,
+ (int_AMDIL_trig_preop_f64 GPRF64:$src0, GPRF32:$src1))]>;
+
+
+def LDEXP_f32 : TwoInOneOut<IL_OP_LDEXP, (outs GPRF32:$dst),
+ (ins GPRF32:$src, GPRI32:$src1),
+ !strconcat(IL_OP_LDEXP.Text, " $dst, $src, $src1"),
+ [(set GPRF32:$dst,
+ (int_AMDIL_ldexp GPRF32:$src, GPRI32:$src1))]>;
+
+def LDEXP_v2f32 : TwoInOneOut<IL_OP_LDEXP, (outs GPRV2F32:$dst),
+ (ins GPRV2F32:$src, GPRV2I32:$src1),
+ !strconcat(IL_OP_LDEXP.Text, " $dst, $src, $src1"),
+ [(set GPRV2F32:$dst,
+ (int_AMDIL_ldexp GPRV2F32:$src, GPRV2I32:$src1))]>;
+
+def LDEXP_v4f32 : TwoInOneOut<IL_OP_LDEXP, (outs GPRV4F32:$dst),
+ (ins GPRV4F32:$src, GPRV4I32:$src1),
+ !strconcat(IL_OP_LDEXP.Text, " $dst, $src, $src1"),
+ [(set GPRV4F32:$dst,
+ (int_AMDIL_ldexp GPRV4F32:$src, GPRV4I32:$src1))]>;
+defm MSAD : TernaryIntrinsicInt<IL_OP_MSAD, int_AMDIL_media_msad>;
+
+def QSAD_i64 : ThreeInOneOut<IL_OP_QSAD, (outs GPRI64:$dst),
+ (ins GPRI64:$src0, GPRI32:$src1, GPRI64:$src2),
+ !strconcat(IL_OP_QSAD.Text, " $dst, $src0, $src1, $src2"),
+ [(set GPRI64:$dst,
+ (int_AMDIL_media_qsad GPRI64:$src0, GPRI32:$src1, GPRI64:$src2))]>;
+
+def MQSAD_i64 : ThreeInOneOut<IL_OP_MQSAD, (outs GPRI64:$dst),
+ (ins GPRI64:$src0, GPRI32:$src1, GPRI64:$src2),
+ !strconcat(IL_OP_MQSAD.Text, " $dst, $src0, $src1, $src2"),
+ [(set GPRI64:$dst,
+ (int_AMDIL_media_mqsad GPRI64:$src0, GPRI32:$src1, GPRI64:$src2))]>;
+
+defm ADD_RTE : BinaryIntrinsicFloat<IL_OP_ADD_RTE, int_AMDIL_add_rte>;
+defm ADD_RTP : BinaryIntrinsicFloat<IL_OP_ADD_RTP, int_AMDIL_add_rtp>;
+defm ADD_RTN : BinaryIntrinsicFloat<IL_OP_ADD_RTN, int_AMDIL_add_rtn>;
+defm ADD_RTZ : BinaryIntrinsicFloat<IL_OP_ADD_RTZ, int_AMDIL_add_rtz>;
+defm SUB_RTE : BinaryIntrinsicFloat<IL_OP_SUB_RTE, int_AMDIL_sub_rte>;
+defm SUB_RTP : BinaryIntrinsicFloat<IL_OP_SUB_RTP, int_AMDIL_sub_rtp>;
+defm SUB_RTN : BinaryIntrinsicFloat<IL_OP_SUB_RTN, int_AMDIL_sub_rtn>;
+defm SUB_RTZ : BinaryIntrinsicFloat<IL_OP_SUB_RTZ, int_AMDIL_sub_rtz>;
+defm MUL_RTE : BinaryIntrinsicFloat<IL_OP_MUL_RTE, int_AMDIL_mul_rte>;
+defm MUL_RTP : BinaryIntrinsicFloat<IL_OP_MUL_RTP, int_AMDIL_mul_rtp>;
+defm MUL_RTN : BinaryIntrinsicFloat<IL_OP_MUL_RTN, int_AMDIL_mul_rtn>;
+defm MUL_RTZ : BinaryIntrinsicFloat<IL_OP_MUL_RTZ, int_AMDIL_mul_rtz>;
+defm MAD_RTE : TernaryIntrinsicFloat<IL_OP_MAD_RTE, int_AMDIL_mad_rte>;
+defm MAD_RTP : TernaryIntrinsicFloat<IL_OP_MAD_RTP, int_AMDIL_mad_rtp>;
+defm MAD_RTN : TernaryIntrinsicFloat<IL_OP_MAD_RTN, int_AMDIL_mad_rtn>;
+defm MAD_RTZ : TernaryIntrinsicFloat<IL_OP_MAD_RTZ, int_AMDIL_mad_rtz>;
+defm FMA_RTE : TernaryIntrinsicFloat<IL_OP_FMA_RTE, int_AMDIL_fma_rte>;
+defm FMA_RTP : TernaryIntrinsicFloat<IL_OP_FMA_RTP, int_AMDIL_fma_rtp>;
+defm FMA_RTN : TernaryIntrinsicFloat<IL_OP_FMA_RTN, int_AMDIL_fma_rtn>;
+defm FMA_RTZ : TernaryIntrinsicFloat<IL_OP_FMA_RTZ, int_AMDIL_fma_rtz>;
+defm ADD_RTE : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTE, int_AMDIL_add_rte>;
+defm ADD_RTP : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTP, int_AMDIL_add_rtp>;
+defm ADD_RTN : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTN, int_AMDIL_add_rtn>;
+defm ADD_RTZ : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTZ, int_AMDIL_add_rtz>;
+defm SUB_RTE : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTE, int_AMDIL_sub_rte>;
+defm SUB_RTP : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTP, int_AMDIL_sub_rtp>;
+defm SUB_RTN : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTN, int_AMDIL_sub_rtn>;
+defm SUB_RTZ : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTZ, int_AMDIL_sub_rtz>;
+defm MUL_RTE : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTE, int_AMDIL_mul_rte>;
+defm MUL_RTP : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTP, int_AMDIL_mul_rtp>;
+defm MUL_RTN : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTN, int_AMDIL_mul_rtn>;
+defm MUL_RTZ : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTZ, int_AMDIL_mul_rtz>;
+defm MAD_RTE : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTE, int_AMDIL_mad_rte>;
+defm MAD_RTP : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTP, int_AMDIL_mad_rtp>;
+defm MAD_RTN : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTN, int_AMDIL_mad_rtn>;
+defm MAD_RTZ : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTZ, int_AMDIL_mad_rtz>;
+
+
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,193 @@
+//===-- AMDILIntrinsicInfo.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL Implementation of the IntrinsicInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDIL.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include <cstring>
+using namespace llvm;
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "AMDILGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+AMDILIntrinsicInfo::AMDILIntrinsicInfo(AMDILTargetMachine *tm)
+ : TargetIntrinsicInfo(), mTM(tm)
+{
+}
+
+std::string
+AMDILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
+ unsigned int numTys) const
+{
+ static const char* const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ //assert(!isOverloaded(IntrID)
+ //&& "AMDIL Intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics) {
+ return "";
+ }
+ assert(IntrID < AMDILIntrinsic::num_AMDIL_intrinsics
+ && "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+static bool
+checkTruncation(const char *Name, unsigned int& Len)
+{
+ const char *ptr = Name + (Len - 1);
+ while(ptr != Name && *ptr != '_') {
+ --ptr;
+ }
+ // We don't want to truncate on atomic instructions
+ // but we do want to enter the check Truncation
+ // section so that we can translate the atomic
+ // instructions if we need to.
+ if (!strncmp(Name, "__atom", 6)) {
+ return true;
+ }
+ if (strstr(ptr, "i32")
+ || strstr(ptr, "u32")
+ || strstr(ptr, "i64")
+ || strstr(ptr, "u64")
+ || strstr(ptr, "f32")
+ || strstr(ptr, "f64")
+ || strstr(ptr, "i16")
+ || strstr(ptr, "u16")
+ || strstr(ptr, "i8")
+ || strstr(ptr, "u8")) {
+ Len = (unsigned int)(ptr - Name);
+ return true;
+ }
+ return false;
+}
+
+// We don't want to support both the OpenCL 1.0 atomics
+// and the 1.1 atomics with different names, so we translate
+// the 1.0 atomics to the 1.1 naming here if needed.
+static char*
+atomTranslateIfNeeded(const char *Name, unsigned int Len)
+{
+ char *buffer = NULL;
+ if (strncmp(Name, "__atom_", 7)) {
+ // If we are not starting with __atom_, then
+ // go ahead and continue on with the allocation.
+ buffer = new char[Len + 1];
+ memcpy(buffer, Name, Len);
+ } else {
+ buffer = new char[Len + 3];
+ memcpy(buffer, "__atomic_", 9);
+ memcpy(buffer + 9, Name + 7, Len - 7);
+ Len += 2;
+ }
+ buffer[Len] = '\0';
+ return buffer;
+}
+
+unsigned int
+AMDILIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
+{
+#define GET_FUNCTION_RECOGNIZER
+#include "AMDILGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ AMDILIntrinsic::ID IntrinsicID
+ = (AMDILIntrinsic::ID)Intrinsic::not_intrinsic;
+ if (checkTruncation(Name, Len)) {
+ char *buffer = atomTranslateIfNeeded(Name, Len);
+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", buffer);
+ delete [] buffer;
+ } else {
+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
+ }
+ if (!isValidIntrinsic(IntrinsicID)) {
+ return 0;
+ }
+ if (IntrinsicID != (AMDILIntrinsic::ID)Intrinsic::not_intrinsic) {
+ return IntrinsicID;
+ }
+ return 0;
+}
+
+bool
+AMDILIntrinsicInfo::isOverloaded(unsigned IntrID) const
+{
+ if (IntrID == 0)
+ return false;
+ // Overload Table
+ unsigned id = IntrID - Intrinsic::num_intrinsics + 1;
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function*
+AMDILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTys) const
+{
+ assert(!isOverloaded(IntrID) && "AMDIL intrinsics are not overloaded");
+ AttrListPtr AList = getAttributes((AMDILIntrinsic::ID) IntrID);
+ LLVMContext& Context = M->getContext();
+ unsigned int id = IntrID;
+ Type *ResultTy = NULL;
+ std::vector<Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+ // We need to add the resource ID argument for atomics.
+ if (id >= AMDILIntrinsic::AMDIL_atomic_add_gi32
+ && id <= AMDILIntrinsic::AMDIL_atomic_xor_ru64_noret) {
+ ArgTys.push_back(IntegerType::get(Context, 32));
+ }
+
+ return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+ FunctionType::get(ResultTy, ArgTys, IsVarArg),
+ AList));
+}
+
+/// Because the code generator has to support different SC versions,
+/// this function is added to check that the intrinsic being used
+/// is actually valid. In the case where it isn't valid, the
+/// function call is not translated into an intrinsic and the
+/// fall back software emulated path should pick up the result.
+bool
+AMDILIntrinsicInfo::isValidIntrinsic(unsigned int IntrID) const
+{
+ const AMDILSubtarget *stm = mTM->getSubtargetImpl();
+ switch (IntrID) {
+ default:
+ return true;
+ case AMDILIntrinsic::AMDIL_convert_f32_i32_rpi:
+ case AMDILIntrinsic::AMDIL_convert_f32_i32_flr:
+ case AMDILIntrinsic::AMDIL_convert_f32_f16_near:
+ case AMDILIntrinsic::AMDIL_convert_f32_f16_neg_inf:
+ case AMDILIntrinsic::AMDIL_convert_f32_f16_plus_inf:
+ return stm->calVersion() >= CAL_VERSION_SC_139;
+ };
+}
Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,53 @@
+//===-- AMDILIntrinsicInfo.h ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the AMDIL Implementation of the Intrinsic Info class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_INTRINSICS_H_
+#define _AMDIL_INTRINSICS_H_
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Intrinsics.h"
+
+namespace llvm
+{
+class AMDILTargetMachine;
+namespace AMDILIntrinsic
+{
+enum ID {
+ last_non_AMDIL_intrinsic = Intrinsic::num_intrinsics - 1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_AMDIL_intrinsics
+};
+
+}
+
+
+class AMDILIntrinsicInfo : public TargetIntrinsicInfo
+{
+ AMDILTargetMachine *mTM;
+public:
+ AMDILIntrinsicInfo(AMDILTargetMachine *tm);
+ std::string getName(unsigned int IntrId, Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ unsigned int lookupName(const char *Name, unsigned int Len) const;
+ bool isOverloaded(unsigned int IID) const;
+ Function *getDeclaration(Module *M, unsigned int ID,
+ Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ bool isValidIntrinsic(unsigned int) const;
+}; // AMDILIntrinsicInfo
+}
+
+#endif // _AMDIL_INTRINSICS_H_
+
More information about the llvm-branch-commits
mailing list